5.13.5.3.2.2.2. 强制内联的函数
下面的 order 就是按拓扑序(调用次序)排列的 cgraph_node 的队列。在这里,对排序节点的访问是从最接近调用栈底部的函数开始(注意在上一节可以看到,如果存在环,构成环的函数会排在最后),这样就可以从最后一个被调用的函数开始考查,一旦这个函数被判定为不能内联,其调用者自然就不能内联(对环亦如是,因此上面对构成环函数的排序时,把它们放在排序队列末尾,所有其它调用它们的函数都不能内联)。
cgraph_decide_inlining (continue)
1271 /* In the first pass mark all always_inline edges. Do this with a priority
1272 so none of our later choices will make this impossible. */
1273 for (i = nnodes - 1; i >= 0; i--)
1274 {
1275 struct cgraph_edge *e;
1276
1277 node = order[i];
1278
1279 for (e = node->callees; e; e = e->next_callee)
1280 if (e->callee->local.disregard_inline_limits)
1281 break ;
1282 if (!e)
1283 continue ;
1284 if (cgraph_dump_file )
1285 fprintf (cgraph_dump_file ,
1286 "/nConsidering %s %i insns (always inline)/n",
1287 cgraph_node_name (e->callee), e->callee->global.insns);
1288 ninlined = cgraph_inlined_into (order[i], inlined);
1289 for (; e; e = e->next_callee)
1290 {
1291 old_insns = overall_insns ;
1292 if (!e->inline_failed || !e->callee->local.inlinable
1293 || !e->callee->local.disregard_inline_limits)
1294 continue ;
1295 if (e->callee->output || e->callee == node)
1296 {
1297 e->inline_failed = N_("recursive inlining");
1298 continue ;
1299 }
1300 ninlined_callees =
1302 cgraph_inlined_callees (e->callee, inlined_callees);
1303 cgraph_mark_inline (node, e->callee, inlined, ninlined,
1304 inlined_callees, ninlined_callees);
1305 for (y = 0; y < ninlined_callees; y++)
1306 inlined_callees[y]->output = 0, inlined_callees[y]->aux = 0;
1307 if (cgraph_dump_file )
1308 fprintf (cgraph_dump_file ,
1309 " Inlined into %s which now has %i insns./n",
1310 cgraph_node_name (node->callees->caller),
1311 node->callees->caller->global.insns);
1312 }
1313 if (cgraph_dump_file && node->global.cloned_times > 0)
1314 fprintf (cgraph_dump_file ,
1315 " Inlined %i times for a net change of %+i insns./n",
1316 node->global.cloned_times, overall_insns - old_insns);
1317 for (y = 0; y < ninlined; y++)
1318 inlined[y]->output = 0, inlined[y]->aux = 0;
1319 }
1279 行的循环依次访问被指定函数直接调用的函数, 1280 行的条件选出要求不计代价内联的函数(通过属性“ always_inline ”)。以这个“ always_inline ”函数为界,这里要准备两组数据,一组是所有直接、间接调用该函数,并被前面分析为可内联的函数。
681 static int
682 cgraph_inlined_into (struct cgraph_node *node, struct cgraph_node **array) in cgraphunit.c
683 {
684 int nfound = 0;
685 struct cgraph_edge **stack;
686 struct cgraph_edge *e, *e1;
687 int sp;
688 int i;
689
690 /* Fast path: since we traverse in mostly topological order, we will likely
691 find no edges. */
692 for (e = node->callers; e; e = e->next_caller)
693 if (!e->inline_failed)
694 break ;
695
696 if (!e)
697 return 0;
698
699 /* Allocate stack for back-tracking up callgraph. */
700 stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge));
701 sp = 0;
702
703 /* Push the first edge on to the stack. */
704 stack[sp++] = e;
705
706 while (sp)
707 {
708 struct cgraph_node *caller;
709
710 /* Look at the edge on the top of the stack. */
711 e = stack[sp - 1];
712 caller = e->caller;
713
714 /* Check if the caller destination has been visited yet. */
715 if (!caller->output)
716 {
717 array[nfound++] = e->caller;
718 /* Mark that we have visited the destination. */
719 caller->output = true;
720 SET_INLINED_TIMES (caller, 0);
721 }
722 SET_INLINED_TIMES (caller, INLINED_TIMES (caller) + 1);
723
724 for (e1 = caller->callers; e1; e1 = e1->next_caller)
725 if (!e1->inline_failed)
726 break ;
727
728 if (e1)
729 stack[sp++] = e1;
730 else
731 {
732 while (true)
733 {
734 for (e1 = e->next_caller; e1; e1 = e1->next_caller)
735 if (!e1->inline_failed)
736 break ;
737
738 if (e1)
739 {
740 stack[sp - 1] = e1;
741 break ;
742 }
743 else
744 {
745 sp--;
746 if (!sp)
747 break ;
748 e = stack[sp - 1];
749 }
750 }
751 }
752 }
753
754 free (stack);
755
756
757 if (cgraph_dump_file )
758 {
759 fprintf (cgraph_dump_file , " Found inline predecesors of %s:",
760 cgraph_node_name (node));
761 for (i = 0; i < nfound; i++)
762 {
763 fprintf (cgraph_dump_file , " %s", cgraph_node_name (array[i]));
764 if (INLINED_TIMES (array[i]) != 1)
765 fprintf (cgraph_dump_file , " (%i times)",
766 (int)INLINED_TIMES (array[i]));
767 }
768 fprintf (cgraph_dump_file , "/n");
769 }
770
771 return nfound;
772 }
上面执行的是在 stack 辅助下的深度优先的遍历。首先 629 行的循环找出第一个调用指定函数的可内联的函数(注意在 create_edge 中,新创建的 cgraph_edge 的 inline_failed 域都不是空的,如果它是 NULL 则表明该调用已明确能够内联,但函数是否能内联,取决于它是否在所有的调用中可内联)。把这个函数加入 stack 的栈中,开始遍历。 Stack 在 666 行增长,这是因为找到了可内联的更上一级的调用者;而在 677 行改写栈顶对象,这是因为这个函数的所有调用者都已考查了,需要进而考查其兄弟函数;在 682 行则减小,因为这一层所有的函数都已经考察了,需要回退到被调用者那一级,考查其兄弟函数。
注意 659 行的 SET_INLINED_TIMES ,它将累计函数被内联的次数,其定义如下:
607 #define INLINED_TIMES (node) ((size_t)(node)->aux) in cgraphunit.c
608 #define SET_INLINED_TIMES (node,times) ((node)->aux = (void *)(times))
这些可内联的直接、间接的调用者被计入 cgraph_decide_inlining 的 inlined 数组中( 1288 行)。
注意在调用 cgraph_inlined_callees 之前, cgraph_decide_inlining 对递归调用的判断。考虑 cgraph_node 都已拓扑序排列,如果出现递归调用,必然有从被调用者到调用者的间接或直接的路径,这就表现为遇到已经得到处理的节点(“ e->callee->output ”为 true ),或对自身的调用(条件“ e->callee == node ”)。这样的函数肯定不能内联,因此不需要往下处理。
而另一组就是该函数直接、间接调用的被前面分析为可内联的函数。它们被计入 cgraph_decide_inlining 的 inlined_callees 数组中( 1301 行)。
717 static int
718 cgraph_inlined_callees (struct cgraph_node *node, struct cgraph_node **array) in cgraphunit.c
719 {
720 int nfound = 0;
721 struct cgraph_edge **stack;
722 struct cgraph_edge *e, *e1;
723 int sp;
724 int i;
725
726 /* Fast path: since we traverse in mostly topological order, we will likely
727 find no edges. */
728 for (e = node->callees; e; e = e->next_callee)
729 if (!e->inline_failed)
730 break ;
731
732 if (!e)
733 return 0;
734
735 /* Allocate stack for back-tracking up callgraph. */
736 stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge));
737 sp = 0;
738
739 /* Push the first edge on to the stack. */
740 stack[sp++] = e;
741
742 while (sp)
743 {
744 struct cgraph_node *callee;
745
746 /* Look at the edge on the top of the stack. */
747 e = stack[sp - 1];
748 callee = e->callee;
749
750 /* Check if the callee destination has been visited yet. */
751 if (!callee->output)
752 {
753 array[nfound++] = e->callee;
754 /* Mark that we have visited the destination. */
755 callee->output = true;
756 SET_INLINED_TIMES (callee, 0);
757 }
758 SET_INLINED_TIMES (callee, INLINED_TIMES (callee) + 1);
759
760 for (e1 = callee->callees; e1; e1 = e1->next_callee)
761 if (!e1->inline_failed)
762 break ;
763 if (e1)
764 stack[sp++] = e1;
765 else
766 {
767 while (true)
768 {
769 for (e1 = e->next_callee; e1; e1 = e1->next_callee)
770 if (!e1->inline_failed)
771 break ;
772
773 if (e1)
774 {
775 stack[sp - 1] = e1;
776 break ;
777 }
778 else
779 {
780 sp--;
781 if (!sp)
782 break ;
783 e = stack[sp - 1];
784 }
785 }
786 }
787 }
788
789 free (stack);
790
791 if (cgraph_dump_file )
792 {
793 fprintf (cgraph_dump_file , " Found inline successors of %s:",
794 cgraph_node_name (node));
795 for (i = 0; i < nfound; i++)
796 {
797 fprintf (cgraph_dump_file , " %s", cgraph_node_name (array[i]));
798 if (INLINED_TIMES (array[i]) != 1)
799 fprintf (cgraph_dump_file , " (%i times)",
800 (int)INLINED_TIMES (array[i]));
801 }
802 fprintf (cgraph_dump_file , "/n");
803 }
804
805 return nfound;
806 }
这里也是深度优先的遍历,所不同的是,遍历采用的是 callees 链及 next_callee 链。
这里注意在 cgraph_decide_inlining 中对所考查被调用者的选择,在 1292 行,条件“ !e->inline_failed ”,表示该调用已明确可以内联展开,条件“ !e->callee->local.inlinable ”则表示该函数不可内联。对于可内联的函数,但对其调用没有确定能内联的情况,一方面通过 cgraph_inlined_callees 收集已明确可内联展开的路径,一方面通过下面的函数设置该次调用,并判断函数是否能内联。
951 static void
952 cgraph_mark_inline (struct cgraph_node *to, struct cgraph_node *what, in cgraphunit.c
953 struct cgraph_node **inlined, int ninlined,
954 struct cgraph_node **inlined_callees,
955 int ninlined_callees)
956 {
957 int i;
958 int times = 0;
959 int clones = 0;
960 struct cgraph_edge *e;
961 bool called = false;
962 int new_insns;
963
964 what->global.inlined = 1;
965 for (e = what->callers; e; e = e->next_caller)
966 {
967 if (e->caller == to)
968 {
969 if (!e->inline_failed)
970 continue ;
971 e->inline_failed = NULL;
972 times++;
973 clones += e->caller->global.cloned_times;
974 }
975 else if (e->inline_failed)
976 called = true;
977 }
978 if (!times)
979 abort ();
980 ncalls_inlined += times;
981
982 new_insns = cgraph_estimate_size_after_inlining (times, to, what);
983 if (to->global.will_be_output)
984 overall_insns += new_insns - to->global.insns;
985 to->global.insns = new_insns;
986
987 if (!called && !what->needed && !what->origin
988 && flag_unit_at_a_time
989 && !DECL_EXTERNAL (what->decl))
990 {
991 if (!what->global.will_be_output)
992 abort ();
993 clones--;
994 nfunctions_inlined ++;
995 what->global.will_be_output = 0;
996 overall_insns -= what->global.insns;
997 }
998 what->global.cloned_times += clones;
999 for (i = 0; i < ninlined; i++)
1000 {
1001 new_insns =
1002 cgraph_estimate_size_after_inlining (INLINED_TIMES (inlined[i]) *
1003 times, inlined[i], what);
1004 if (inlined[i]->global.will_be_output)
1005 overall_insns += new_insns - inlined[i]->global.insns;
1006 inlined[i]->global.insns = new_insns;
1007 }
1008 for (i = 0; i < ninlined_callees; i++)
1009 {
1010 inlined_callees[i]->global.cloned_times +=
1011 INLINED_TIMES (inlined_callees[i]) * clones;
1012 }
1013 }
注意能进入上面这个函数的 cgraph_node 节点的“ local.inlinable ”都是成立的,因为在调用这个函数时,已明确这次调用是可内联的。注意每个 cgraph_edge 代表一次调用,如果一个函数对另一个函数有多次调用,就会构建多个 cgraph_edge 一一对应。不过如果满足 969 行条件,就意味着编译器出现问题,它将在 979 行终止编译。
另外,某次调用可内联,并不意味着函数就可以内联展开,必须是所有的调用都可以展开的情形下,函数才可以进行内联。即满足 987 行条件(其中,“ !what->needed ”表示该函数地址没有被引用,“ !what->origin ”表示不是嵌套函数),否则就降级为函数调用。
909 static int
910 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to, in cgraphunit.c
911 struct cgraph_node *what)
912 {
913 return (what->global.insns - INSNS_PER_CALL) * times + to->global.insns;
914 }
接下来则是更新总行数的估计( overall_insns 的计算),及每个被调用内联函数的总展开行数( 1010 行)。注意 1006 行对上级调用者的行数估算没有计入当前函数,不过上级调用者的处理尚未开始,在作为当前函数处理时,它会得到正确的数值。