5.13.5.3.2.2.2. Function demands inline
Below, order is the queue of cgraph_node in invocation order. Here, the visit to the sorted nodes begins at the function at bottom of the call stack (note that see in previous section, if there is cycle, functions forming this cycle would be put at the tail of the queue), that is treatment begins at the last invoked function, once the function is determined as not inlinable, its caller is neither (same as cycle, so below when sorting functions of cycle, put them at tail of the sorted queue, then all other callers can’t be inlined too).
cgraph_decide_inlining (continue)
1271 /* In the first pass mark all always_inline edges. Do this with a priority
1272 so none of our later choices will make this impossible. */
1273 for (i = nnodes - 1; i >= 0; i--)
1274 {
1275 struct cgraph_edge *e;
1276
1277 node = order[i];
1278
1279 for (e = node->callees; e; e = e->next_callee)
1280 if (e->callee->local.disregard_inline_limits)
1281 break ;
1282 if (!e)
1283 continue ;
1284 if (cgraph_dump_file )
1285 fprintf (cgraph_dump_file ,
1286 "/nConsidering %s %i insns (always inline)/n",
1287 cgraph_node_name (e->callee), e->callee->global.insns);
1288 ninlined = cgraph_inlined_into (order[i], inlined);
1289 for (; e; e = e->next_callee)
1290 {
1291 old_insns = overall_insns ;
1292 if (!e->inline_failed || !e->callee->local.inlinable
1293 || !e->callee->local.disregard_inline_limits)
1294 continue ;
1295 if (e->callee->output || e->callee == node)
1296 {
1297 e->inline_failed = N_("recursive inlining");
1298 continue ;
1299 }
1300 ninlined_callees =
1302 cgraph_inlined_callees (e->callee, inlined_callees);
1303 cgraph_mark_inline (node, e->callee, inlined, ninlined,
1304 inlined_callees, ninlined_callees);
1305 for (y = 0; y < ninlined_callees; y++)
1306 inlined_callees[y]->output = 0, inlined_callees[y]->aux = 0;
1307 if (cgraph_dump_file )
1308 fprintf (cgraph_dump_file ,
1309 " Inlined into %s which now has %i insns./n",
1310 cgraph_node_name (node->callees->caller),
1311 node->callees->caller->global.insns);
1312 }
1313 if (cgraph_dump_file && node->global.cloned_times > 0)
1314 fprintf (cgraph_dump_file ,
1315 " Inlined %i times for a net change of %+i insns./n",
1316 node->global.cloned_times, overall_insns - old_insns);
1317 for (y = 0; y < ninlined; y++)
1318 inlined[y]->output = 0, inlined[y]->aux = 0;
1319 }
Loop at line 1279 accesses functions directly invoked by the specified function, condition at line 1280 selects function forcely demand inlining (via attribution “always_inline”). Separated by this “always_inline” function, there needs two set of data, one is of all directly, indireclty callers of this function, which have been analyzed as inlinable already.
681 static int
682 cgraph_inlined_into (struct cgraph_node *node, struct cgraph_node **array) in cgraphunit.c
683 {
684 int nfound = 0;
685 struct cgraph_edge **stack;
686 struct cgraph_edge *e, *e1;
687 int sp;
688 int i;
689
690 /* Fast path: since we traverse in mostly topological order, we will likely
691 find no edges. */
692 for (e = node->callers; e; e = e->next_caller)
693 if (!e->inline_failed)
694 break ;
695
696 if (!e)
697 return 0;
698
699 /* Allocate stack for back-tracking up callgraph. */
700 stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge));
701 sp = 0;
702
703 /* Push the first edge on to the stack. */
704 stack[sp++] = e;
705
706 while (sp)
707 {
708 struct cgraph_node *caller;
709
710 /* Look at the edge on the top of the stack. */
711 e = stack[sp - 1];
712 caller = e->caller;
713
714 /* Check if the caller destination has been visited yet. */
715 if (!caller->output)
716 {
717 array[nfound++] = e->caller;
718 /* Mark that we have visited the destination. */
719 caller->output = true;
720 SET_INLINED_TIMES (caller, 0);
721 }
722 SET_INLINED_TIMES (caller, INLINED_TIMES (caller) + 1);
723
724 for (e1 = caller->callers; e1; e1 = e1->next_caller)
725 if (!e1->inline_failed)
726 break ;
727
728 if (e1)
729 stack[sp++] = e1;
730 else
731 {
732 while (true)
733 {
734 for (e1 = e->next_caller; e1; e1 = e1->next_caller)
735 if (!e1->inline_failed)
736 break ;
737
738 if (e1)
739 {
740 stack[sp - 1] = e1;
741 break ;
742 }
743 else
744 {
745 sp--;
746 if (!sp)
747 break ;
748 e = stack[sp - 1];
749 }
750 }
751 }
752 }
753
754 free (stack);
755
756
757 if (cgraph_dump_file )
758 {
759 fprintf (cgraph_dump_file , " Found inline predecesors of %s:",
760 cgraph_node_name (node));
761 for (i = 0; i < nfound; i++)
762 {
763 fprintf (cgraph_dump_file , " %s", cgraph_node_name (array[i]));
764 if (INLINED_TIMES (array[i]) != 1)
765 fprintf (cgraph_dump_file , " (%i times)",
766 (int)INLINED_TIMES (array[i]));
767 }
768 fprintf (cgraph_dump_file , "/n");
769 }
770
771 return nfound;
772 }
Above the traversal is the deep-first one with aid of stack. First loop at line 629 finds out the first inlinable caller (note that in create_edge , field inline_failed of fresh cgraph_edge is not empty, so if it is NULL means this invocation is known inlinable already, but whether the function is inlinable depends on whether it can be inlined in all invocations). Add the caller into stack, and begins iteration. Stack grows at line 666 dues to find out an inlinable caller of the caller further; and overwirtes the top element at line 677 dues to callers of the function have been visited, it needs cover those of its sibles; and decrease at line 682, because the function tegother with its sibles are finished, needs retrospect to the callee, and visit its sible.
Note SET_INLINED_TIMES at line 659, it accumulates the times of inlining if it taken, its definition is as below:
607 #define INLINED_TIMES (node) ((size_t)(node)->aux) in cgraphunit.c
608 #define SET_INLINED_TIMES (node,times) ((node)->aux = (void *)(times))
All such direct, indirect callers are included into array inlined in cgraph_decide_inlining (line 1288).
Note that before invoking cgraph_inlined_callees , how to determine the recursion by cgraph_decide_inlining . Given cgraph_node is in topological order, if there is a recursion, it must be a direct or indirect path from callee to caller, which showed by encountering processed node (“e->callee->output” is true), or self invocation (condition “e->callee == node”). Such function can’t be inlined, so needn’t further handling.
Another group of data is direct or indirect callees which are considered inlinable in previous. They are recorded into array inlined_callees in cgraph_decide_inlining (line 1301).
717 static int
718 cgraph_inlined_callees (struct cgraph_node *node, struct cgraph_node **array) in cgraphunit.c
719 {
720 int nfound = 0;
721 struct cgraph_edge **stack;
722 struct cgraph_edge *e, *e1;
723 int sp;
724 int i;
725
726 /* Fast path: since we traverse in mostly topological order, we will likely
727 find no edges. */
728 for (e = node->callees; e; e = e->next_callee)
729 if (!e->inline_failed)
730 break ;
731
732 if (!e)
733 return 0;
734
735 /* Allocate stack for back-tracking up callgraph. */
736 stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge));
737 sp = 0;
738
739 /* Push the first edge on to the stack. */
740 stack[sp++] = e;
741
742 while (sp)
743 {
744 struct cgraph_node *callee;
745
746 /* Look at the edge on the top of the stack. */
747 e = stack[sp - 1];
748 callee = e->callee;
749
750 /* Check if the callee destination has been visited yet. */
751 if (!callee->output)
752 {
753 array[nfound++] = e->callee;
754 /* Mark that we have visited the destination. */
755 callee->output = true;
756 SET_INLINED_TIMES (callee, 0);
757 }
758 SET_INLINED_TIMES (callee, INLINED_TIMES (callee) + 1);
759
760 for (e1 = callee->callees; e1; e1 = e1->next_callee)
761 if (!e1->inline_failed)
762 break ;
763 if (e1)
764 stack[sp++] = e1;
765 else
766 {
767 while (true)
768 {
769 for (e1 = e->next_callee; e1; e1 = e1->next_callee)
770 if (!e1->inline_failed)
771 break ;
772
773 if (e1)
774 {
775 stack[sp - 1] = e1;
776 break ;
777 }
778 else
779 {
780 sp--;
781 if (!sp)
782 break ;
783 e = stack[sp - 1];
784 }
785 }
786 }
787 }
788
789 free (stack);
790
791 if (cgraph_dump_file )
792 {
793 fprintf (cgraph_dump_file , " Found inline successors of %s:",
794 cgraph_node_name (node));
795 for (i = 0; i < nfound; i++)
796 {
797 fprintf (cgraph_dump_file , " %s", cgraph_node_name (array[i]));
798 if (INLINED_TIMES (array[i]) != 1)
799 fprintf (cgraph_dump_file , " (%i times)",
800 (int)INLINED_TIMES (array[i]));
801 }
802 fprintf (cgraph_dump_file , "/n");
803 }
804
805 return nfound;
806 }
Here it is also a deep-first traversal, what differnet is that it goes along with callees chain and next_callee chain.
Here pay attention to how to select candidate in cgraph_decide_inlining , at line 1292, condtion “!e->inline_failed” means the invocation is known inlinable already, and condition “!e->callee->local.inlinable” indicates the function is not inlinable. For inlinable function, but the inlinability of its invocation isn’t known yet, one side by cgraph_inlined_callees to collect path known inlinable, on the other side by below funciton to set this invocation, and determine whether the function can be inlined.
951 static void
952 cgraph_mark_inline (struct cgraph_node *to, struct cgraph_node *what, in cgraphunit.c
953 struct cgraph_node **inlined, int ninlined,
954 struct cgraph_node **inlined_callees,
955 int ninlined_callees)
956 {
957 int i;
958 int times = 0;
959 int clones = 0;
960 struct cgraph_edge *e;
961 bool called = false;
962 int new_insns;
963
964 what->global.inlined = 1;
965 for (e = what->callers; e; e = e->next_caller)
966 {
967 if (e->caller == to)
968 {
969 if (!e->inline_failed)
970 continue ;
971 e->inline_failed = NULL;
972 times++;
973 clones += e->caller->global.cloned_times;
974 }
975 else if (e->inline_failed)
976 called = true;
977 }
978 if (!times)
979 abort ();
980 ncalls_inlined += times;
981
982 new_insns = cgraph_estimate_size_after_inlining (times, to, what);
983 if (to->global.will_be_output)
984 overall_insns += new_insns - to->global.insns;
985 to->global.insns = new_insns;
986
987 if (!called && !what->needed && !what->origin
988 && flag_unit_at_a_time
989 && !DECL_EXTERNAL (what->decl))
990 {
991 if (!what->global.will_be_output)
992 abort ();
993 clones--;
994 nfunctions_inlined ++;
995 what->global.will_be_output = 0;
996 overall_insns -= what->global.insns;
997 }
998 what->global.cloned_times += clones;
999 for (i = 0; i < ninlined; i++)
1000 {
1001 new_insns =
1002 cgraph_estimate_size_after_inlining (INLINED_TIMES (inlined[i]) *
1003 times, inlined[i], what);
1004 if (inlined[i]->global.will_be_output)
1005 overall_insns += new_insns - inlined[i]->global.insns;
1006 inlined[i]->global.insns = new_insns;
1007 }
1008 for (i = 0; i < ninlined_callees; i++)
1009 {
1010 inlined_callees[i]->global.cloned_times +=
1011 INLINED_TIMES (inlined_callees[i]) * clones;
1012 }
1013 }
Note that every cgraph_node entering above funciton has its field “local.inlinable” held, it is because at calling this function, this invocation under treatment is known inlinable. See that one cgraph_edge stands for one invocation, if one function invokes another function several times, it will build more than one cgraph_edge for one to one mapping. But if it satisfies condition at line 969, it means something wrong in the compiler, it will abort at line 979.
Besides, one invocation is inlinable doesn't means the function is inlinable. The function is inlinable only when all its invocations are inlinable, that is condition at line 987 (in which, “!what->needed” indicates its address isn’t referred, “!what->origin” stands for nested function), otherwise it will be degraded to function call instead of inlining.
909 static int
910 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to, in cgraphunit.c
911 struct cgraph_node *what)
912 {
913 return (what->global.insns - INSNS_PER_CALL) * times + to->global.insns;
914 }
Following is updating the estimation of total instruction number (calculation of overall_insns ), and total expanded instruction number of every inlinable function (line 1010). Note that at line 1006 the estimation of instruction number of the caller doesn’t include current function, nevertheless treatment of the caller doesn’t begin yet, when being processed as current one, it would get more correct number.