从 cgraph_finalize_compilation_unit 返回到 finish_file ,这时这个编译单元中全局变量及用到的静态变量都已经被输出为汇编代码,用到的函数也已经构建入 cgraph 图中,其中被调用者、调用者关系一目了然。接下来调用下面的 cgraph_optimize 根据这个 cgraph 网进行优化,并输出函数的汇编代码。
1579 void
1580 cgraph_optimize (void) in cgraphunit.c
1581 {
1582 if (!flag_unit_at_a_time )
1583 return ;
1584 timevar_push (TV_CGRAPHOPT);
1585 if (!quiet_flag )
1586 fprintf (stderr , "Performing intraprocedural optimizations/n");
1587
1588 cgraph_mark_local_functions ();
1589 if (cgraph_dump_file )
1590 {
1591 fprintf (cgraph_dump_file , "Marked ");
1592 dump_cgraph (cgraph_dump_file );
1593 }
1594
1595 cgraph_decide_inlining ();
1596 cgraph_global_info_ready = true;
1597 if (cgraph_dump_file )
1598 {
1599 fprintf (cgraph_dump_file , "Optimized ");
1600 dump_cgraph (cgraph_dump_file );
1601 }
1602 timevar_pop (TV_CGRAPHOPT);
1603
1604 /* Output everything. */
1605 if (!quiet_flag )
1606 fprintf (stderr , "Assembling functions:/n");
1607 cgraph_expand_all_functions ();
1608 if (cgraph_dump_file )
1609 {
1610 fprintf (cgraph_dump_file , "/nFinal ");
1611 dump_cgraph (cgraph_dump_file );
1612 }
1613 }
看到这是一个强度颇高的优化,只有优化开关打开时才进行。
首先标记局部函数。这里局部函数的含义是:调用局限在当前编译单元中,并且其所有的调用都是显式的,因而我们可以改变其调用规范( calling convention )。
1556 static void
1557 cgraph_mark_local_functions (void) in cgraphunit.c
1558 {
1559 struct cgraph_node *node;
1560
1561 if (cgraph_dump_file )
1562 fprintf (cgraph_dump_file , "/nMarking local functions:");
1563
1564 /* Figure out functions we want to assemble. * /
1565 for (node = cgraph_nodes ; node; node = node->next)
1566 {
1567 node->local.local = (!node->needed
1568 && DECL_SAVED_TREE (node->decl)
1569 && !TREE_PUBLIC (node->decl));
1570 if (cgraph_dump_file && node->local.local)
1571 fprintf (cgraph_dump_file , " %s", cgraph_node_name (node));
1572 }
1573 if (cgraph_dump_file )
1574 fprintf (cgraph_dump_file , "/n/n");
1575 }
使 1567 行赋值表达式右边为 1 的函数是,地址没有被引用( node->needed 因而为 0 )的静态函数( TREE_PUBLIC 为 0 )。
前面我们已经为声明为内联的函数进行了分析,现在我们要进一步确定理论上能内联的函数,是否能真正内联。
1233 static void
1234 cgraph_decide_inlining (void) in cgraphunit.c
1235 {
1236 struct cgraph_node *node;
1237 int nnodes;
1238 struct cgraph_node **order =
1239 xcalloc (cgraph_n_nodes , sizeof (struct cgraph_node *));
1240 struct cgraph_node **inlined =
1241 xcalloc (cgraph_n_nodes , sizeof (struct cgraph_node *));
1242 struct cgraph_node **inlined_callees =
1243 xcalloc (cgraph_n_nodes , sizeof (struct cgraph_node *));
1244 int ninlined;
1245 int ninlined_callees;
1246 int old_insns = 0;
1247 int i, y;
1248
1249 for (node = cgraph_nodes ; node; node = node->next)
1250 initial_insns += node->local.self_insns;
1251 overall_insns = initial_insns ;
1252
1253 nnodes = cgraph_postorder (order);
1254
1255 if (cgraph_dump_file )
1256 fprintf (cgraph_dump_file ,
1257 "/nDeciding on inlining. Starting with %i insns./n",
1258 initial_insns);
1259
1260 for (node = cgraph_nodes ; node; node = node->next)
1261 node->aux = 0;
1262
1263 if (cgraph_dump_file )
1264 fprintf (cgraph_dump_file , "/nInlining always_inline functions:/n");
1265 #ifdef ENABLE_CHECKING
1266 for (node = cgraph_nodes ; node; node = node->next)
1267 if (node->aux || node->output)
1268 abort ();
1269 #endif
前面看到 cgraph_node 的 local.self_insns 保存了该函数(不限于内联函数)估算的指令数,因此在 1251 行, overall_insns 及 initial_insns 得到该编译单元的总指令数。