5.13.5.3.2.2.3. 普通的内联函数
处理完了强制内联函数(使用“ always_inline ”)后,剩下的函数由编译器便宜行事。只要我们不通过编译选项 -fno-inline 明确禁止函数内联, 1326 行的 flag_really_no_inline 就是 0 。
cgraph_decide_inline (continue)
1320 #ifdef ENABLE_CHECKING
1321 for (node = cgraph_nodes ; node; node = node->next)
1322 if (node->aux || node->output)
1323 abort ();
1324 #endif
1325
1326 if (!flag_really_no_inline )
1327 {
1328 cgraph_decide_inlining_of_small_functions (inlined, inlined_callees);
1329 #ifdef ENABLE_CHECKING
1330 for (node = cgraph_nodes ; node; node = node->next)
1331 if (node->aux || node->output)
1332 abort ();
1333 #endif
1334
1335 if (cgraph_dump_file )
1336 fprintf (cgraph_dump_file , "/nDeciding on functions called once:/n");
这里,参数 inlined 及 inlined_callees 作为缓存使用,其在前面所设置的内容无关重要,将在后面被改写。下面的分析中使用了 Fibonacci heaps 来根据展开的行数排序。关于 Fibonacci heaps 的细节这里不深究。
1100 static void
1101 cgraph_decide_inlining_of_small_functions (struct cgraph_node **inlined, in cgraphunit.c
1102 struct cgraph_node **inlined_callees)
1103 {
1104 int i;
1105 struct cgraph_node *node;
1106 fibheap_t heap = fibheap_new ();
1107 struct fibnode **heap_node =
1108 xcalloc (cgraph_max_uid , sizeof (struct fibnode *));
1109 int ninlined, ninlined_callees;
1110 int max_insns = ((HOST_WIDEST_INT) initial_insns
1111 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1112
1113 /* Put all inline candidates into the heap. */
1114
1115 for (node = cgraph_nodes ; node; node = node->next)
1116 {
1117 if (!node->local.inlinable || !node->callers
1118 || node->local.disregard_inline_limits)
1119 continue ;
1120
1121 if (!cgraph_default_inline_p (node))
1122 {
1123 cgraph_set_inline_failed (node,
1124 N_("--param max-inline-insns-single limit reached"));
1125 continue ;
1126 }
1127 heap_node[node->uid] =
1128 fibheap_insert (heap, cgraph_estimate_growth (node), node);
1129 }
因为现在是编译器做主,那么函数能否内联,除了它本身适合内联外,还取决于其展开大小。对于声明为“ inline ”的函数,这个限制是 MAX_INLINE_INSNS_SINGLE (默认为 500 ),对于没有声明为“ inline ”的函数,其限制则是 MAX_INLINE_INSNS_AUTO (默认为 100 )。注意,单位是指令,不是行数。
1068 static bool
1069 cgraph_default_inline_p (struct cgraph_node *n) in cgraphunit.c
1070 {
1071 if (!DECL_INLINE (n->decl) || !DECL_SAVED_TREE (n->decl))
1072 return false;
1073 if (DECL_DECLARED_INLINE_P (n->decl))
1074 return n->global.insns < MAX_INLINE_INSNS_SINGLE;
1075 else
1076 return n->global.insns < MAX_INLINE_INSNS_AUTO;
1077 }
对于不能内联的函数,同时要把其失败的原因告诉其调用者,除非调用者已经被强制内联了(彼时,其 inline_failed 是 NULL )。
1081 static void
1082 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason) in cgraphunit.c
1083 {
1084 struct cgraph_edge *e;
1085
1086 if (cgraph_dump_file )
1087 fprintf (cgraph_dump_file , "Inlining failed: %s/n", reason);
1088 for (e = node->callers; e; e = e->next_caller)
1089 if (e->inline_failed)
1090 e->inline_failed = reason;
1091 }
一旦函数的大小在限制之内,就把它加入 Fibonacci heaps ,不过作为键值使用的,则是展开它所带来的指令数的增加量。
918 static int
919 cgraph_estimate_growth (struct cgraph_node *node) in cgraphunit.c
920 {
921 int growth = 0;
922 int calls_saved = 0;
923 int clones_added = 0;
924 struct cgraph_edge *e;
925
926 for (e = node->callers; e; e = e->next_caller)
927 if (e->inline_failed)
928 {
929 growth += ((cgraph_estimate_size_after_inlining (1, e->caller, node)
930 -
931 e->caller->global.insns) *e->caller->global.cloned_times);
932 calls_saved += e->caller->global.cloned_times;
933 clones_added += e->caller->global.cloned_times;
934 }
935
936 /* ??? Wrong for self recursive functions or cases where we decide to not
937 inline for different reasons, but it is not big deal as in that case
938 we will keep the body around, but we will also avoid some inlining. */
939 if (!node->needed && !node->origin && !DECL_EXTERNAL (node->decl))
940 growth -= node->global.insns, clones_added--;
941
942 if (!calls_saved)
943 calls_saved = 1;
944
945 return growth;
946 }
不过因为现在还没有对调用者、被调用者进行分析,这是个比较粗略的估算(没有考虑该函数所直接、间接调用函数的大小)。随着下面对函数分析的深入,这个估算会趋向准确。也正是因为下面将频繁改写节点的键值,出于效率考虑,采用了 Fibonacci heaps 。
在 1133 行, fibheap_extract_min 从 Fibonacci heaps 中移出键值最小的节点。在一开始,这个键值仅是该函数本身的指令数乘以被调用的次数。显然,从键值最小的函数开始处理,是很好的开始,它最有可能是在调用栈最底层的函数。
cgraph_decide_inlining_of_small_functions (continue)
1131 if (cgraph_dump_file )
1132 fprintf (cgraph_dump_file , "/nDeciding on smaller functions:/n");
1133 while (overall_insns <= max_insns && (node = fibheap_extract_min (heap)))
1134 {
1135 struct cgraph_edge *e;
1136 int old_insns = overall_insns ;
1137
1138 heap_node[node->uid] = NULL;
1139 if (cgraph_dump_file )
1140 fprintf (cgraph_dump_file ,
1141 "/nConsidering %s with %i insns/n"
1142 " Estimated growth is %+i insns./n",
1143 cgraph_node_name (node), node->global.insns,
1144 cgraph_estimate_growth (node));
1145 if (!cgraph_default_inline_p (node))
1146 {
1147 cgraph_set_inline_failed (node,
1148 N_("--param max-inline-insns-single limit reached after inlining into the callee"));
1149 continue ;
1150 }
1151 ninlined_callees = cgraph_inlined_callees (node, inlined_callees);
1152 for (e = node->callers; e; e = e->next_caller)
1153 if (e->inline_failed)
1154 {
1155 /* Marking recursive function inlinine has sane semantic and
1156 thus we should not warn on it. */
1157 if (e->caller == node)
1158 {
1159 e->inline_failed = "";
1160 continue ;
1161 }
1162 ninlined = cgraph_inlined_into (e->caller, inlined);
1163 if (e->callee->output)
1164 e->inline_failed = "";
1165 if (e->callee->output
1166 || !cgraph_check_inline_limits (e->caller, node, inlined,
1167 ninlined, &e->inline_failed))
1168 {
1169 for (i = 0; i < ninlined; i++)
1170 inlined[i]->output = 0, inlined[i]->aux = 0;
1171 if (cgraph_dump_file )
1172 fprintf (cgraph_dump_file , " Not inlining into %s./n",
1173 cgraph_node_name (e->caller));
1174 continue ;
1175 }
1176 cgraph_mark_inline (e->caller, node, inlined, ninlined,
1177 inlined_callees, ninlined_callees);
1178 if (heap_node[e->caller->uid])
1179 fibheap_replace_key (heap, heap_node[e->caller->uid],
1180 cgraph_estimate_growth (e->caller));
1181
1182 /* Size of the functions we updated into has changed, so update
1183 the keys. */
1184 for (i = 0; i < ninlined; i++)
1185 {
1186 inlined[i]->output = 0, inlined[i]->aux = 0;
1187 if (heap_node[inlined[i]->uid])
1188 fibheap_replace_key (heap, heap_node[inlined[i]->uid],
1189 cgraph_estimate_growth (inlined[i]));
1190 }
1191 if (cgraph_dump_file )
1192 fprintf (cgraph_dump_file ,
1193 " Inlined into %s which now has %i insns./n",
1194 cgraph_node_name (e->caller),
1195 e->caller->global.insns);
1196 }
1197
1198 /* Similarly all functions called by the function we just inlined
1199 are now called more times; update keys. */
1200
1201 for (e = node->callees; e; e = e->next_callee)
1202 if (e->inline_failed && heap_node[e->callee->uid])
1203 fibheap_replace_key (heap, heap_node[e->callee->uid],
1204 cgraph_estimate_growth (e->callee));
1205
1206 for (i = 0; i < ninlined_callees; i++)
1207 {
1208 struct cgraph_edge *e;
1209
1210 for (e = inlined_callees[i]->callees; e; e = e->next_callee)
1211 if (e->inline_failed && heap_node[e->callee->uid])
1212 fibheap_replace_key (heap, heap_node[e->callee->uid],
1213 cgraph_estimate_growth (e->callee));
1214
1215 inlined_callees[i]->output = 0;
1216 inlined_callees[i]->aux = 0;
1217 }
1218 if (cgraph_dump_file )
1219 fprintf (cgraph_dump_file ,
1220 " Inlined %i times for a net change of %+i insns./n",
1221 node->global.cloned_times, overall_insns - old_insns);
1222 }
1223 while ((node = fibheap_extract_min (heap)) != NULL)
1224 if (!node->local.disregard_inline_limits)
1225 cgraph_set_inline_failed (node, N_("--param inline-unit-growth limit reached"));
1226 fibheap_delete (heap);
1227 free (heap_node);
1228 }
除了给估算展开大小比较小的函数优先处理外,编译器对内联函数基本上一视同仁,一旦展开到了一定程度,就不再允许内联了。这个规则,一方面是总体指令数的增加率,它由 1133 行的 max_insns 控制(默认情况下,编译器允许指令数 50% 的增长);另一方面则体现在 cgraph_check_inline_limits 中。
1018 static bool
1019 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
1020 struct cgraph_node **inlined, int ninlined,
1021 const char **reason)
1022 {
1023 int i;
1024 int times = 0;
1025 struct cgraph_edge *e;
1026 int newsize;
1027 int limit;
1028
1029 for (e = to->callees; e; e = e->next_callee)
1030 if (e->callee == what)
1031 times++;
1032
1033 /* When inlining large function body called once into small function,
1034 take the inlined function as base for limiting the growth. */
1035 if (to->local.self_insns > what->local.self_insns)
1036 limit = to->local.self_insns;
1037 else
1038 limit = what->local.self_insns;
1039
1040 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
1041
1042 newsize = cgraph_estimate_size_after_inlining (times, to, what);
1043 if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
1044 && newsize > limit)
1045 {
1046 *reason = N_("--param large-function-growth limit reached");
1047 return false;
1048 }
1049 for (i = 0; i < ninlined; i++)
1050 {
1051 newsize =
1052 cgraph_estimate_size_after_inlining (INLINED_TIMES (inlined[i]) *
1053 times, inlined[i], what);
1054 if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
1055 && newsize >
1056 inlined[i]->local.self_insns *
1057 (100 + PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH)) / 100)
1058 {
1059 *reason = N_("--param large-function-growth limit reached while inlining the caller");
1060 return false;
1061 }
1062 }
1063 return true;
1064 }
PARAM_LARGE_FUNCTION_GROWTH 用于控制因为内联了大函数而导致该部分指令数增长的百分率(默认是 100% ),显然如果该大函数被内联超过 1 次就会超标。而 PARAM_LARGE_FUNCTION_INSNS 则是一个值,当函数的估算指令数超过这个值,就被认为是大函数。
在 1163 行的 output ,在目前的情形下,它是由 1151 行的 cgraph_inlined_callees 设置的因为一开始 output 都是 0 ,而且在 1152 行的循环中,每次处理完都会重置相关节点的 output 域( 1170 及 1186 行),因此如果现在发现被调用函数的 output 已经设置了,显然还有别的函数内联了该函数,那么当前函数就不能被其调用者内联了(但依然内联这个被调用函数),以防止这个被调用函数的展开次数出现指数级增长。
一旦当前函数通过了 1165 及 1166 行的检查,就认为可以内联了,通过 cgraph_mark_inline 来更新相应的参数。作为结果,其调用者展开它的代价也就变了,所以还需要更新 Heaps 中节点(如果还在的话)。
在 cgraph_decide_inlining_of_small_functions 的最后,如果指令增长率超过了预设,那么剩下的函数一概不许内联。因为 heaps 以展开指令数排序,可以预见剩下的都是比较大的函数。
cgraph_decide_inline (continue)
1338 /* And finally decide what functions are called once. */
1339
1340 for (i = nnodes - 1; i >= 0; i--)
1341 {
1342 node = order[i];
1343
1344 if (node->callers && !node->callers->next_caller && !node->needed
1345 && node->local.inlinable && node->callers->inline_failed
1346 && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
1347 {
1348 bool ok = true;
1349 struct cgraph_node *node1;
1350
1351 /* Verify that we won't duplicate the caller. */
1352 for (node1 = node->callers->caller;
1353 node1->callers && !node1->callers->inline_failed
1354 && ok; node1 = node1->callers->caller)
1355 if (node1->callers->next_caller || node1->needed)
1356 ok = false;
1357 if (ok)
1358 {
1359 const char *dummy_reason;
1360 if (cgraph_dump_file )
1361 fprintf (cgraph_dump_file ,
1362 "/nConsidering %s %i insns./n"
1363 " Called once from %s %i insns./n",
1364 cgraph_node_name (node), node->global.insns,
1365 cgraph_node_name (node->callers->caller),
1366 node->callers->caller->global.insns);
1367 ninlined = cgraph_inlined_into (node->callers->caller,
1368 inlined);
1369 old_insns = overall_insns ;
1370
1371 /* Inlining functions once would never cause inlining warnings. */
1372 if (cgraph_check_inline_limits
1373 (node->callers->caller, node, inlined, ninlined,
1374 &dummy_reason))
1375 {
1376 ninlined_callees =
1377 cgraph_inlined_callees (node, inlined_callees);
1378 cgraph_mark_inline (node->callers->caller, node, inlined,
1379 ninlined, inlined_callees,
1380 ninlined_callees);
1381 for (y = 0; y < ninlined_callees; y++)
1382 inlined_callees[y]->output = 0, inlined_callees[y]->aux = 0;
1383 if (cgraph_dump_file )
1384 fprintf (cgraph_dump_file ,
1385 " Inlined into %s which now has %i insns"
1386 " for a net change of %+i insns./n",
1387 cgraph_node_name (node->callers->caller),
1388 node->callers->caller->global.insns,
1389 overall_insns - old_insns);
1390 }
1391 else
1392 {
1393 if (cgraph_dump_file )
1394 fprintf (cgraph_dump_file ,
1395 " Inline limit reached, not inlined./n");
1396 }
1397 for (y = 0; y < ninlined; y++)
1398 inlined[y]->output = 0, inlined[y]->aux = 0;
1399 }
1400 }
1401 }
1402 }
1403 cgraph_remove_unreachable_nodes ();
1404
1405 if (cgraph_dump_file )
1406 fprintf (cgraph_dump_file ,
1407 "/nInlined %i calls, eliminated %i functions, "
1408 "%i insns turned to %i insns./n/n",
1409 ncalls_inlined , nfunctions_inlined , initial_insns ,
1410 overall_insns );
1411 free (order);
1412 free (inlined);
1413 free (inlined_callees);
1414 }
回到 cgraph_decide_inline ,对于只被调用一次,而又在上面被冤杀的小函数,编译器还是要网开一面,毕竟内联这些小函数的利益还是比较大。在 1403 行,还要进行可访问分析,移除不需要的函数。
回到 cgraph_optimize ,在上面的处理中 cgraph_node 节点的 global 部分已经得到设置,在 1596 行,设置 cgraph_global_info_ready 来显示这一事实。