4.2.10. 评估算术操作的代价
4.2.10.1. 创建伪函数上下文
回到backend_init,下来是init_dummy_function_start。它初始化了rtl的展开机制,因而我们可以进行一些简单序列的生成。
6518 void
6519 init_dummy_function_start (void) in function.c
6520 {
6521 prepare_function_start (NULL);
6522 }
在这里调用了prepare_function_start来创建伪函数上下文cfun,因为下面的初始化函数需要产生rtl(在正式的代码编译中,只有完成代码解析及前端的优化后,才会生成rtl的表示形式,并交予后端)。
6482 static void
6483 prepare_function_start (tree fndecl) in function.c
6484 {
6485 if (fndecl && DECL_SAVED_INSNS (fndecl))
6486 cfun = DECL_SAVED_INSNS (fndecl);
6487 else
6488 allocate_struct_function (fndecl);
6489 init_emit ();
6490 init_varasm_status (cfun);
6491 init_expr ();
6492
6493 cse_not_expected = ! optimize;
6494
6495 /* Caller save not needed yet. */
6496 caller_save_needed = 0;
6497
6498 /* We haven't done register allocation yet. */
6499 reg_renumber = 0;
6500
6501 /* Indicate that we need to distinguish between the return value of the
6502 present function and the return value of a function being called. */
6503 rtx_equal_function_value_matters = 1;
6504
6505 /* Indicate that we have not instantiated virtual registers yet. */
6506 virtuals_instantiated = 0;
6507
6508 /* Indicate that we want CONCATs now. */
6509 generating_concat_p = 1;
6510
6511 /* Indicate we have no need of a frame pointer yet. */
6512 frame_pointer_needed = 0;
6513 }
结构体function保存了,描述当前被编译函数状态的所有重要的,全局及静态变量。
176 struct function GTY(()) in function.h
177 {
178 struct eh_status *eh;
179 struct stmt_status *stmt;
180 struct expr_status *expr;
181 struct emit_status *emit;
182 struct varasm_status *varasm;
183
184 /* For function.c. */
185
186 /* Points to the FUNCTION_DECL of this function. */
187 tree decl;
188
189 /* Function containing this function, if any. */
190 struct function *outer;
191
192 /* Number of bytes of args popped by function being compiled on its return.
193 Zero if no bytes are to be popped.
194 May affect compilation of return insn or of function epilogue. */
195 int pops_args;
196
197 /* If function's args have a fixed size, this is that size, in bytes.
198 Otherwise, it is -1.
199 May affect compilation of return insn or of function epilogue. */
200 int args_size;
201
202 /* # bytes the prologue should push and pretend that the caller pushed them.
203 The prologue must do this, but only if parms can be passed in
204 registers. */
205 int pretend_args_size;
206
207 /* # of bytes of outgoing arguments. If ACCUMULATE_OUTGOING_ARGS is
208 defined, the needed space is pushed by the prologue. */
209 int outgoing_args_size;
210
211 /* This is the offset from the arg pointer to the place where the first
212 anonymous arg can be found, if there is one. */
213 rtx arg_offset_rtx;
214
215 /* Quantities of various kinds of registers
216 used for the current function's args. */
217 CUMULATIVE_ARGS args_info;
218
219 /* If nonzero, an RTL expression for the location at which the current
220 function returns its result. If the current function returns its
221 result in a register, current_function_return_rtx will always be
222 the hard register containing the result. */
223 rtx return_rtx;
224
225 /* The arg pointer hard register, or the pseudo into which it was copied. */
226 rtx internal_arg_pointer;
227
228 /* Language-specific reason why the current function cannot be made
229 inline. */
230 const char *cannot_inline;
231
232 /* Opaque pointer used by get_hard_reg_initial_val and
233 has_hard_reg_initial_val (see integrate.[hc]). */
234 struct initial_value_struct *hard_reg_initial_vals;
235
236 /* Number of function calls seen so far in current function. */
237 int x_function_call_count;
238
239 /* List (chain of TREE_LIST) of LABEL_DECLs for all nonlocal labels
240 (labels to which there can be nonlocal gotos from nested functions)
241 in this function. */
242 tree x_nonlocal_labels;
243
244 /* List (chain of EXPR_LIST) of stack slots that hold the current handlers
245 for nonlocal gotos. There is one for every nonlocal label in the
246 function; this list matches the one in nonlocal_labels.
247 Zero when function does not have nonlocal labels. */
248 rtx x_nonlocal_goto_handler_slots;
249
250 /* List (chain of EXPR_LIST) of labels heading the current handlers for
251 nonlocal gotos. */
252 rtx x_nonlocal_goto_handler_labels;
253
254 /* RTX for stack slot that holds the stack pointer value to restore
255 for a nonlocal goto.
256 Zero when function does not have nonlocal labels. */
257 rtx x_nonlocal_goto_stack_level;
258
259 /* Label that will go on parm cleanup code, if any.
260 Jumping to this label runs cleanup code for parameters, if
261 such code must be run. Following this code is the logical return
262 label. */
263 rtx x_cleanup_label;
264
265 /* Label that will go on function epilogue.
266 Jumping to this label serves as a "return" instruction
267 on machines which require execution of the epilogue on all returns. */
268 rtx x_return_label;
269
270 /* Label that will go on the end of function epilogue.
271 Jumping to this label serves as a "naked return" instruction
272 on machines which require execution of the epilogue on all returns. */
273 rtx x_naked_return_label;
274
275 /* Label and register for unswitching computed gotos. */
276 rtx computed_goto_common_label;
277 rtx computed_goto_common_reg;
278
279 /* List (chain of EXPR_LISTs) of pseudo-regs of SAVE_EXPRs.
280 So we can mark them all live at the end of the function, if nonopt. */
281 rtx x_save_expr_regs;
282
283 /* List (chain of EXPR_LISTs) of all stack slots in this function.
284 Made for the sake of unshare_all_rtl. */
285 rtx x_stack_slot_list;
286
287 /* Chain of all RTL_EXPRs that have insns in them. */
288 tree x_rtl_expr_chain;
289
290 /* Label to jump back to for tail recursion, or 0 if we have
291 not yet needed one for this function. */
292 rtx x_tail_recursion_label;
293
294 /* Place after which to insert the tail_recursion_label if we need one. */
295 rtx x_tail_recursion_reentry;
296
297 /* Location at which to save the argument pointer if it will need to be
298 referenced. There are two cases where this is done: if nonlocal gotos
299 exist, or if vars stored at an offset from the argument pointer will be
300 needed by inner routines. */
301 rtx x_arg_pointer_save_area;
302
303 /* If the function returns non-void, we will emit a clobber of the
304 return registers just in case the user fell off the end without
305 returning a proper value. This is that insn. */
306 rtx x_clobber_return_insn;
307
308 /* Offset to end of allocated area of stack frame.
309 If stack grows down, this is the address of the last stack slot allocated.
310 If stack grows up, this is the address for the next slot. */
311 HOST_WIDE_INT x_frame_offset;
312
313 /* List (chain of TREE_LISTs) of static chains for containing functions.
314 Each link has a FUNCTION_DECL in the TREE_PURPOSE and a reg rtx
315 in an RTL_EXPR in the TREE_VALUE. */
316 tree x_context_display;
317
318 /* List (chain of TREE_LISTs) of trampolines for nested functions.
319 The trampoline sets up the static chain and jumps to the function.
320 We supply the trampoline's address when the function's address is
321 requested.
322
323 Each link has a FUNCTION_DECL in the TREE_PURPOSE and a reg rtx
324 in an RTL_EXPR in the TREE_VALUE. */
325 tree x_trampoline_list;
326
327 /* Insn after which register parms and SAVE_EXPRs are born, if nonopt. */
328 rtx x_parm_birth_insn;
329
330 /* Last insn of those whose job was to put parms into their nominal
331 homes. */
332 rtx x_last_parm_insn;
333
334 /* 1 + last pseudo register number possibly used for loading a copy
335 of a parameter of this function. */
336 unsigned int x_max_parm_reg;
337
338 /* Vector indexed by REGNO, containing location on stack in which
339 to put the parm which is nominally in pseudo register REGNO,
340 if we discover that that parm must go in the stack. The highest
341 element in this vector is one less than MAX_PARM_REG, above. */
342 rtx * GTY ((length ("%h.x_max_parm_reg"))) x_parm_reg_stack_loc;
343
344 /* List of all temporaries allocated, both available and in use. */
345 struct temp_slot *x_temp_slots;
346
347 /* Current nesting level for temporaries. */
348 int x_temp_slot_level;
349
350 /* Current nesting level for variables in a block. */
351 int x_var_temp_slot_level;
352
353 /* When temporaries are created by TARGET_EXPRs, they are created at
354 this level of temp_slot_level, so that they can remain allocated
355 until no longer needed. CLEANUP_POINT_EXPRs define the lifetime
356 of TARGET_EXPRs. */
357 int x_target_temp_slot_level;
358
359 /* This slot is initialized as 0 and is added to
360 during the nested function. */
361 struct var_refs_queue *fixup_var_refs_queue;
362
363 /* For integrate.c. */
364 int inlinable;
365 int no_debugging_symbols;
366 rtvec original_arg_vector;
367 tree original_decl_initial;
368 /* Last insn of those whose job was to put parms into their nominal
369 homes. */
370 rtx inl_last_parm_insn;
371 /* Highest label number in current function. */
372 int inl_max_label_num;
373
374 /* Function sequence number for profiling, debugging, etc. */
375 int funcdef_no;
376
377 /* For md files. */
378
379 /* tm.h can use this to store whatever it likes. */
380 struct machine_function * GTY ((maybe_undef (""))) machine;
381 /* The largest alignment of slot allocated on the stack. */
382 int stack_alignment_needed;
383 /* Preferred alignment of the end of stack frame. */
384 int preferred_stack_boundary;
385 /* Set when the call to function itself has been emit. */
386 bool recursive_call_emit;
387
388 /* Language-specific code can use this to store whatever it likes. */
389 struct language_function * language;
390
391 /* For reorg. */
392
393 /* If some insns can be deferred to the delay slots of the epilogue, the
394 delay list for them is recorded here. */
395 rtx epilogue_delay_list;
396
397 /* How commonly executed the function is. Initialized during branch
398 probabilities pass. */
399 enum function_frequency {
400 /* This function most likely won't be executed at all.
401 (set only when profile feedback is available). */
402 FUNCTION_FREQUENCY_UNLIKELY_EXECUTED,
403 /* The default value. */
404 FUNCTION_FREQUENCY_NORMAL,
405 /* Optimize this function hard
406 (set only when profile feedback is available). */
407 FUNCTION_FREQUENCY_HOT
408 } function_frequency;
409
410 /* Maximal number of entities in the single jumptable. Used to estimate
411 final flowgraph size. */
412 int max_jumptable_ents;
413
414 /* Collected bit flags. */
415
416 /* Nonzero if function being compiled needs to be given an address
417 where the value should be stored. */
418 unsigned int returns_struct : 1;
419
420 /* Nonzero if function being compiled needs to
421 return the address of where it has put a structure value. */
422 unsigned int returns_pcc_struct : 1;
423
424 /* Nonzero if the current function returns a pointer type. */
425 unsigned int returns_pointer : 1;
426
427 /* Nonzero if function being compiled needs to be passed a static chain. */
428 unsigned int needs_context : 1;
429
430 /* Nonzero if function being compiled can call setjmp. */
431 unsigned int calls_setjmp : 1;
432
433 /* Nonzero if function being compiled can call longjmp. */
434 unsigned int calls_longjmp : 1;
435
436 /* Nonzero if function being compiled can call alloca,
437 either as a subroutine or builtin. */
438 unsigned int calls_alloca : 1;
439
440 /* Nonzero if the function calls __builtin_eh_return. */
441 unsigned int calls_eh_return : 1;
442
443 /* Nonzero if the function calls __builtin_constant_p. */
444 unsigned int calls_constant_p : 1;
445
446 /* Nonzero if function being compiled receives nonlocal gotos
447 from nested functions. */
448 unsigned int has_nonlocal_label : 1;
449
450 /* Nonzero if function being compiled has nonlocal gotos to parent
451 function. */
452 unsigned int has_nonlocal_goto : 1;
453
454 /* Nonzero if function being compiled contains nested functions. */
455 unsigned int contains_functions : 1;
456
457 /* Nonzero if the function being compiled issues a computed jump. */
458 unsigned int has_computed_jump : 1;
459
460 /* Nonzero if the current function is a thunk, i.e., a lightweight
461 function implemented by the output_mi_thunk hook) that just
462 adjusts one of its arguments and forwards to another
463 function. */
464 unsigned int is_thunk : 1;
465
466 /* This bit is used by the exception handling logic. It is set if all
467 calls (if any) are sibling calls. Such functions do not have to
468 have EH tables generated, as they cannot throw. A call to such a
469 function, however, should be treated as throwing if any of its callees
470 can throw. */
471 unsigned int all_throwers_are_sibcalls : 1;
472
473 /* Nonzero if instrumentation calls for function entry and exit should be
474 generated. */
475 unsigned int instrument_entry_exit : 1;
476
477 /* Nonzero if profiling code should be generated. */
478 unsigned int profile : 1;
479
480 /* Nonzero if stack limit checking should be enabled in the current
481 function. */
482 unsigned int limit_stack : 1;
483
484 /* Nonzero if current function uses stdarg.h or equivalent. */
485 unsigned int stdarg : 1;
486
487 /* Nonzero if this function is being processed in function-at-a-time
488 mode. In other words, if all tree structure for this function,
489 including the BLOCK tree, is created before RTL generation
490 commences. */
491 unsigned int x_whole_function_mode_p : 1;
492
493 /* Nonzero if the back-end should not keep track of expressions that
494 determine the size of variable-sized objects. Normally, such
495 expressions are saved away, and then expanded when the next
496 function is started. For example, if a parameter has a
497 variable-sized type, then the size of the parameter is computed
498 when the function body is entered. However, some front-ends do
499 not desire this behavior. */
500 unsigned int x_dont_save_pending_sizes_p : 1;
501
502 /* Nonzero if the current function uses the constant pool. */
503 unsigned int uses_const_pool : 1;
504
505 /* Nonzero if the current function uses pic_offset_table_rtx. */
506 unsigned int uses_pic_offset_table : 1;
507
508 /* Nonzero if the current function needs an lsda for exception handling. */
509 unsigned int uses_eh_lsda : 1;
510
511 /* Nonzero if code to initialize arg_pointer_save_area has been emitted. */
512 unsigned int arg_pointer_save_area_init : 1;
513
514 /* Flag for use by ther rtl inliner, to tell if the function has been
515 processed at least once. */
516 unsigned int rtl_inline_init : 1;
517
518 /* Nonzero if the rtl inliner has saved the function for inlining. */
519 unsigned int saved_for_inline : 1;
520 };
其结构中前6个成员是非常重要的部分。其中,eh_status控制了函数的异常处理;stmt_status记录了语句对栈,数据流的累积效应,这些效应将影响当前语句的翻译;expr_status则记录了函数被调用及退出的行为;emit_status控制相应的rtx形式指令的生成;varasm_status提供了记录函数中常量的方法。
6434 void
6435 allocate_struct_function (tree fndecl) in function.c
6436 {
6437 tree result;
6438
6439 cfun = ggc_alloc_cleared (sizeof (struct function));
6440
6441 max_parm_reg = LAST_VIRTUAL_REGISTER + 1;
6442
6443 cfun->stack_alignment_needed = STACK_BOUNDARY;
6444 cfun->preferred_stack_boundary = STACK_BOUNDARY;
6445
6446 current_function_funcdef_no = funcdef_no++;
6447
6448 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
6449
6450 init_stmt_for_function ();
6451 init_eh_for_function ();
6452
6453 (*lang_hooks.function.init) (cfun);
6454 if (init_machine_status)
6455 cfun->machine = (*init_machine_status) ();
6456
6457 if (fndecl == NULL)
6458 return;
…
6477 }
在上面,current_function_funcdef_no,current_function_returns_pcc_struct,current_function_returns_struct,current_function_returns_pointer,max_parm_reg,current_function_needs_context都是用于从function结构体中选择特定成员的宏。例如。
558 #define current_function_funcdef_no (cfun->funcdef_no) in function.h
567 #define max_parm_reg (cfun->x_max_parm_reg)
在6441行,对于x86机器,LAST_VIRTUAL_REGISTER被定义成FIRST_PSEUDO_REGISTER +4。其值是57,它定义了可被用于rtx形式代码的最大寄存器数目。
在6443及6444行,STACK_BOUNDARY定义了对栈的对齐边界要求。对于x86,这是32(4字节)。
在6446行,funcdef_no为用于概要分析(profiling)及调试等目的的标签赋予唯一的号码。在function 结构体相应的域则记录这个值。
接下来,在6450及6451行,函数init_stmt_for_function,init_eh_for_function仅是分配了结构体中的eh及stmt域,它们分别用于异常处理及语句。
在6453行,钩子lang_hooks.function.init对于C++语言是cxx_push_function_context。在当前上下文环境中,它仅为function对象分配language_function实例。结构体language_function将保存及恢复那些记录当前函数编译进度的变量。它被用于嵌套函数。
在6454行,对于x86机器,init_machine_status指向ix86_init_machine_status。它为function 对象创建了machine_function的实例,这个结构保存了与函数处理相关的机器信息。
在6457行,在这次的调用中,fndecl为null,函数在此返回。