来到 cgraph_optimize 中调用的最后一个函数,它为所有的函数进行优化,并发布汇编。
1579 static void
1580 cgraph_expand_all_functions (void) in cgraphunit.c
1581 {
1582 struct cgraph_node *node;
1583 struct cgraph_node **order =
1584 xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *));
1585 int order_pos = 0;
1586 int i;
1587
1588 cgraph_mark_functions_to_output ();
1589
1590 order_pos = cgraph_postorder (order);
1591
1592 for (i = order_pos - 1; i >= 0; i--)
1593 {
1594 node = order[i];
1595 if (node->output)
1596 {
1597 if (!node->reachable)
1598 abort ();
1599 node->output = 0;
1600 cgraph_expand_function (node);
1601 }
1602 }
1603 free (order);
1604 }
5.13.5.3.2.4.1. 标记出需要发布汇编的函数
首先,找出需要发布汇编的函数。记住内联函数将在被调用处展开,它不作为函数发布。
462 static void
463 cgraph_mark_functions_to_output (void) in cgraphunit.c
464 {
465 struct cgraph_node *node;
466
467 for (node = cgraph_nodes ; node; node = node->next)
468 {
469 tree decl = node->decl;
470 struct cgraph_edge *e;
471
472 if (node->output)
473 abort ();
474
475 for (e = node->callers; e; e = e->next_caller)
476 if (e->inline_failed)
477 break ;
478
479 /* We need to output all local functions that are used and not
480 always inlined, as well as those that are reachable from
481 outside the current compilation unit. */
482 if (DECL_SAVED_TREE (decl)
483 && (node->needed
484 || (e && node->reachable))
485 && !TREE_ASM_WRITTEN (decl) && !node->origin
486 && !DECL_EXTERNAL (decl))
487 node->output = 1;
488 else
489 DECL_SAVED_INSNS (decl) = NULL;
490 }
491 }
489 行的 DECL_SAVED_INSNS 事实上记录着解析该函数时所生成的函数上下文结构。对于准备内联的函数,这个上下文已经不需要了。
5.13.5.3.2.4.2. 基于 cgraph 展开内联函数调用
5.13.5.3.2.4.2.1. 找出函数体中可以内联的调用
在 cgraph_expand_all_functions 的 1590 行,再次调用 cgraph_postorder 对函数按调用次序排序。我们从位于调用栈的函数开始。
526 static void
527 cgraph_expand_function (struct cgraph_node *node) in cgraphunit.c
528 {
529 tree decl = node->decl;
530
531 if (flag_unit_at_a_time )
532 announce_function (decl);
533
534 cgraph_optimize_function (node);
535
536 /* Generate RTL for the body of DECL. Nested functions are expanded
537 via lang_expand_decl_stmt. */
538 (*lang_hooks .callgraph.expand_function) (decl);
539 if (DECL_DEFER_OUTPUT (decl))
540 abort ();
541
542 current_function_decl = NULL;
543 }
在 532 行, announce_function 在终端上输出处理的函数名。那么在上面 538 行通过语言钩子为函数产生 RTL 代码前,调用下面的函数来执行基于 cgraph 的优化。
495 static void
496 cgraph_optimize_function (struct cgraph_node *node) in cgraphunit.c
497 {
498 tree decl = node->decl;
499
500 timevar_push (TV_INTEGRATION);
501 /* optimize_inline_calls avoids inlining of current_function_decl. */
502 current_function_decl = decl;
503 if (flag_inline_trees )
504 {
505 struct cgraph_edge *e;
506
507 for (e = node->callees; e; e = e->next_callee)
508 if (!e->inline_failed || warn_inline
509 || (DECL_DECLARED_INLINE_P (e->callee->decl)
510 && lookup_attribute ("always_inline",
511 DECL_ATTRIBUTES (e->callee->decl))))
512 break ;
513 if (e)
514 optimize_inline_calls (decl);
515 }
516 if (node->nested)
517 {
518 for (node = node->nested; node; node = node->next_nested)
519 cgraph_optimize_function (node);
520 }
521 timevar_pop (TV_INTEGRATION);
522 }
上面 507 行的循环选出在上面章节中被认为可以内联的函数,注意在 508 行,一旦 warn_inline 不是 0 ,这个函数就要经过下面函数的处理。不过设置这个标志仅表示,如果内联的函数太大,给出警告。
1617 void
1618 optimize_inline_calls (tree fn) in tree-inline.c
1619 {
1620 inline_data id;
1621 tree prev_fn;
1622
1623 /* There is no point in performing inlining if errors have already
1624 occurred -- and we might crash if we try to inline invalid
1625 code. */
1626 if (errorcount || sorrycount)
1627 return ;
1628
1629 /* Clear out ID. */
1630 memset (&id, 0, sizeof (id));
1631
1632 id.decl = fn;
1633 id.current_decl = fn;
1634 /* Don't allow recursion into FN. */
1635 VARRAY_TREE_INIT (id.fns, 32, "fns");
1636 VARRAY_PUSH_TREE (id.fns, fn);
1637 /* Or any functions that aren't finished yet. */
1638 prev_fn = NULL_TREE;
1639 if (current_function_decl )
1640 {
1641 VARRAY_PUSH_TREE (id.fns, current_function_decl );
1642 prev_fn = current_function_decl ;
1643 }
1644
1645 prev_fn = ((*lang_hooks .tree_inlining.add_pending_fn_decls )
1646 (&id.fns, prev_fn));
1647
1648 /* Create the list of functions this call will inline. */
1649 VARRAY_TREE_INIT (id.inlined_fns, 32, "inlined_fns");
1650
1651 /* Keep track of the low-water mark, i.e., the point where the first
1652 real inlining is represented in ID.FNS. */
1653 id.first_inlined_fn = VARRAY_ACTIVE_SIZE (id.fns);
1654
1655 /* Replace all calls to inline functions with the bodies of those
1656 functions. */
1657 id.tree_pruner = htab_create (37, htab_hash_pointer ,
1658 htab_eq_pointer , NULL);
1659 expand_calls_inline (&DECL_SAVED_TREE (fn), &id);
1660
1661 /* Clean up. */
1662 htab_delete (id.tree_pruner);
1663 if (DECL_LANG_SPECIFIC (fn))
1664 {
1665 tree ifn = make_tree_vec (VARRAY_ACTIVE_SIZE (id.inlined_fns));
1666
1667 if (VARRAY_ACTIVE_SIZE (id.inlined_fns))
1668 memcpy (&TREE_VEC_ELT (ifn, 0), &VARRAY_TREE (id.inlined_fns, 0),
1669 VARRAY_ACTIVE_SIZE (id.inlined_fns) * sizeof (tree));
1670 DECL_INLINED_FNS (fn) = ifn;
1671 }
1672 }
在 1620 行的 inline_data 是用于内联处理的辅助数据结构,其定义如下所示:
74 typedef struct inline_data in tree-inline.c
75 {
76 /* A stack of the functions we are inlining. For example, if we are
77 compiling `f', which calls `g', which calls `h', and we are
78 inlining the body of `h', the stack will contain, `h', followed
79 by `g', followed by `f'. The first few elements of the stack may
80 contain other functions that we know we should not recurse into,
81 even though they are not directly being inlined. */
82 varray_type fns;
83 /* The index of the first element of FNS that really represents an
84 inlined function. */
85 unsigned first_inlined_fn;
86 /* The label to jump to when a return statement is encountered. If
87 this value is NULL, then return statements will simply be
88 remapped as return statements, rather than as jumps. */
89 tree ret_label;
90 /* The map from local declarations in the inlined function to
91 equivalents in the function into which it is being inlined. */
92 splay_tree decl_map;
93 /* Nonzero if we are currently within the cleanup for a
94 TARGET_EXPR. */
95 int in_target_cleanup_p;
96 /* A list of the functions current function has inlined. */
97 varray_type inlined_fns;
98 /* We use the same mechanism to build clones that we do to perform
99 inlining. However, there are a few places where we need to
100 distinguish between those two situations. This flag is true if
101 we are cloning, rather than inlining. */
102 bool cloning_p;
103 /* Hash table used to prevent walk_tree from visiting the same node
104 umpteen million times. */
105 htab_t tree_pruner;
106 /* Decl of function we are inlining into. */
107 tree decl;
108 tree current_decl;
109 } inline_data;
注意上面对成员 fns 的注释,这也是为什么先要用 cgraph_postorder 排序的原因,另外由于 optimize_inline_calls 有可能在解析函数体时调用(在不进行优化, flag_unit_at_a_time 为 0 时),在上面的 1645 行,钩子 add_pending_fn_decls 向 fns 加入这些正在解析的函数,并返回最顶层正在解析的函数。
在使用优化的情形下,这个函数应该不做任何事。
2107 tree
2108 cp_add_pending_fn_decls (void* fns_p, tree prev_fn) in tree.c
2109 {
2110 varray_type *fnsp = (varray_type *)fns_p;
2111 struct saved_scope *s;
2112
2113 for (s = scope_chain ; s; s = s->prev)
2114 if (s->function_decl && s->function_decl != prev_fn)
2115 {
2116 VARRAY_PUSH_TREE (*fnsp, s->function_decl);
2117 prev_fn = s->function_decl;
2118 }
2119
2120 return prev_fn;
2121 }
而在下面的调用中,参数 tp 是这个函数体, id->pruner 则是为了确保每个节点只遍历一次而使用的哈希表。
1603 static void
1604 expand_calls_inline (tree *tp, inline_data *id) in tree-inline.c
1605 {
1606 /* Search through *TP, replacing all calls to inline functions by
1607 appropriate equivalents. Use walk_tree in no-duplicates mode
1608 to avoid exponential time complexity. (We can't just use
1609 walk_tree_without_duplicates, because of the special TARGET_EXPR
1610 handling in expand_calls. The hash table is set up in
1611 optimize_function. */
1612 walk_tree (tp, expand_call_inline , id, id->tree_pruner);
1613 }
记得 walk_tree 进行的是前序( pre-order )遍历,在每个遭遇的节点上调用作为其参数的函数。这里是函数 expand_call_inline 。
1233 static tree
1234 expand_call_inline (tree *tp, int *walk_subtrees, void *data) in tree-inline.c
1235 {
1236 inline_data *id;
1237 tree t;
1238 tree expr;
1239 tree stmt;
1240 #ifndef INLINER_FOR_JAVA
1241 tree chain;
1242 tree scope_stmt;
1243 tree use_stmt;
1244 #else /* INLINER_FOR_JAVA */
1245 tree retvar;
1246 #endif /* INLINER_FOR_JAVA */
1247 tree fn;
1248 tree arg_inits;
1249 tree *inlined_body;
1250 splay_tree st;
1251 tree args;
1252 tree return_slot_addr;
1253 const char *reason;
1254
1255 /* See what we've got. */
1256 id = (inline_data *) data;
1257 t = *tp;
1258
1259 /* Recurse, but letting recursive invocations know that we are
1260 inside the body of a TARGET_EXPR. */
1261 if (TREE_CODE (*tp) == TARGET_EXPR)
1262 {
1263 #ifndef INLINER_FOR_JAVA
1264 int i, len = first_rtl_op (TARGET_EXPR);
1265
1266 /* We're walking our own subtrees. */
1267 *walk_subtrees = 0;
1268
1269 /* Actually walk over them. This loop is the body of
1270 walk_trees, omitting the case where the TARGET_EXPR
1271 itself is handled. */
1272 for (i = 0; i < len; ++i)
1273 {
1274 if (i == 2)
1275 ++id->in_target_cleanup_p;
1276 walk_tree (&TREE_OPERAND (*tp, i), expand_call_inline, data,
1277 id->tree_pruner);
1278 if (i == 2)
1279 --id->in_target_cleanup_p;
1280 }
1281
1282 return NULL_TREE;
1283 #else /* INLINER_FOR_JAVA */
1284 abort ();
1285 #endif /* INLINER_FOR_JAVA */
1286 }
1287 else if (TREE_CODE (t) == EXPR_WITH_FILE_LOCATION)
1288 {
1289 /* We're walking the subtree directly. */
1290 *walk_subtrees = 0;
1291 /* Update the source position. */
1292 push_srcloc (EXPR_WFL_FILENAME (t), EXPR_WFL_LINENO (t));
1293 walk_tree (&EXPR_WFL_NODE (t), expand_call_inline, data,
1294 id->tree_pruner);
1295 /* Restore the original source position. */
1296 pop_srcloc ();
1297
1298 return NULL_TREE;
1299 }
1300
1301 if (TYPE_P (t))
1302 /* Because types were not copied in copy_body, CALL_EXPRs beneath
1303 them should not be expanded. This can happen if the type is a
1304 dynamic array type, for example. */
1305 *walk_subtrees = 0;
1306
1307 /* From here on, we're only interested in CALL_EXPRs. */
1308 if (TREE_CODE (t) != CALL_EXPR)
1309 return NULL_TREE;
原则上 expand_call_inline 只对 CALL_EXPR 节点感兴趣。另外,由于 TARGET_EXPR 及 EXPR_WITH_FILE_LOCATION (可能)都封装了 CALL_EXPR ,也要一些特定的处理。其中 EXPR_WITH_FILE_LOCATION 是封装了源位置信息的节点, EXPR_WFL_NODE 给出所包含的表达式。
expand_call_inline (continue)
1311 /* First, see if we can figure out what function is being called.
1312 If we cannot, then there is no hope of inlining the function. */
1313 fn = get_callee_fndecl (t);
1314 if (!fn)
1315 return NULL_TREE;
1316
1317 /* Turn forward declarations into real ones. */
1318 fn = cgraph_node (fn)->decl;
1319
1320 /* If fn is a declaration of a function in a nested scope that was
1321 globally declared inline, we don't set its DECL_INITIAL.
1322 However, we can't blindly follow DECL_ABSTRACT_ORIGIN because the
1323 C++ front-end uses it for cdtors to refer to their internal
1324 declarations, that are not real functions. Fortunately those
1325 don't have trees to be saved, so we can tell by checking their
1326 DECL_SAVED_TREE. */
1327 if (! DECL_INITIAL (fn)
1328 && DECL_ABSTRACT_ORIGIN (fn)
1329 && DECL_SAVED_TREE (DECL_ABSTRACT_ORIGIN (fn)))
1330 fn = DECL_ABSTRACT_ORIGIN (fn);
1331
1332 /* Don't try to inline functions that are not well-suited to
1333 inlining. */
1334 if (!cgraph_inline_p (id->current_decl, fn, &reason))
1335 {
1336 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)))
1337 {
1338 sorry ("%Jinlining failed in call to '%F': %s", fn, fn, reason);
1339 sorry ("called from here");
1340 }
1341 else if (warn_inline && DECL_DECLARED_INLINE_P (fn)
1342 && !DECL_IN_SYSTEM_HEADER (fn)
1343 && strlen (reason)
1344 && !lookup_attribute ("noinline", DECL_ATTRIBUTES (fn)))
1345 {
1346 warning ("%Jinlining failed in call to '%F': %s", fn, fn, reason);
1347 warning ("called from here");
1348 }
1349 return NULL_TREE;
1350 }
1351
1352 if (! (*lang_hooks .tree_inlining.start_inlining) (fn))
1353 return NULL_TREE;
1354
1355 /* Set the current filename and line number to the function we are
1356 inlining so that when we create new _STMT nodes here they get
1357 line numbers corresponding to the function we are calling. We
1358 wrap the whole inlined body in an EXPR_WITH_FILE_AND_LINE as well
1359 because individual statements don't record the filename. */
1360 push_srcloc (DECL_SOURCE_FILE (fn), DECL_SOURCE_LINE (fn));
1361
1362 #ifndef INLINER_FOR_JAVA
1363 /* Build a statement-expression containing code to initialize the
1364 arguments, the actual inline expansion of the body, and a label
1365 for the return statements within the function to jump to. The
1366 type of the statement expression is the return type of the
1367 function call. */
1368 expr = build1 (STMT_EXPR, TREE_TYPE (TREE_TYPE (fn)), make_node (COMPOUND_STMT));
1369 /* There is no scope associated with the statement-expression. */
1370 STMT_EXPR_NO_SCOPE (expr) = 1;
1371 if (lookup_attribute ("warn_unused_result",
1372 TYPE_ATTRIBUTES (TREE_TYPE (fn))))
1373 STMT_EXPR_WARN_UNUSED_RESULT (expr) = 1;
1374 stmt = STMT_EXPR_STMT (expr);
来到这里,经过前面的分析处理,能内联的调用对应的 cgraph_edge 的 inline_failed 域是 NULL ,否则它保存的是不能内联原因的描述。因此,判断每个调用能否内联变得相当简单。
1490 bool
1491 cgraph_inline_p (tree caller_decl, tree callee_decl, const char **reason) in cgraphunit.c
1492 {
1493 struct cgraph_node *caller = cgraph_node (caller_decl);
1494 struct cgraph_node *callee = cgraph_node (callee_decl);
1495 struct cgraph_edge *e;
1496
1497 for (e = caller->callees; e; e = e->next_callee)
1498 if (e->callee == callee)
1499 {
1500 if (e->inline_failed && reason)
1501 *reason = e->inline_failed;
1502 return !e->inline_failed;
1503 }
1504 /* We do not record builtins in the callgraph. Perhaps it would make more
1505 sense to do so and then prune out those not overwritten by explicit
1506 function body. */
1507 if (reason)
1508 *reason = "originally indirect function calls never inlined";
1509 return false;
1510 }
在调用内建( builtin )函数时,内建函数不构建入 cgraph 图中,它们亦不可以内联。上面的 1352 行,当前版本的 GCC 只定义了默认的只返回 true 的钩子函数。 1368 行构建了容纳内联函数展开代码的复合语句块,看到其类型为函数返回类型。