GCC-3.4.6源代码学习笔记(178)

5.13.5.3.2.2.3.          普通的内联函数

处理完了强制内联函数(使用“ always_inline ”)后,剩下的函数由编译器便宜行事。只要我们不通过编译选项 -fno-inline 明确禁止函数内联, 1326 行的 flag_really_no_inline 就是 0

 

cgraph_decide_inline (continue)

 

1320 #ifdef ENABLE_CHECKING

1321   for (node = cgraph_nodes ; node; node = node->next)

1322     if (node->aux || node->output)

1323       abort ();

1324 #endif

1325

1326   if (!flag_really_no_inline )

1327   {

1328     cgraph_decide_inlining_of_small_functions (inlined, inlined_callees);

1329 #ifdef ENABLE_CHECKING

1330     for (node = cgraph_nodes ; node; node = node->next)

1331       if (node->aux || node->output)

1332         abort ();

1333 #endif

1334

1335     if (cgraph_dump_file )

1336       fprintf (cgraph_dump_file , "/nDeciding on functions called once:/n");

 

这里,参数 inlined inlined_callees 作为缓存使用,其在前面所设置的内容无关重要,将在后面被改写。下面的分析中使用了 Fibonacci heaps 来根据展开的行数排序。关于 Fibonacci heaps 的细节这里不深究。

 

1100       static void

1101       cgraph_decide_inlining_of_small_functions (struct cgraph_node **inlined,      in cgraphunit.c

1102                                            struct cgraph_node **inlined_callees)

1103       {

1104         int i;

1105         struct cgraph_node *node;

1106         fibheap_t heap = fibheap_new ();

1107         struct fibnode **heap_node =

1108           xcalloc (cgraph_max_uid , sizeof (struct fibnode *));

1109         int ninlined, ninlined_callees;

1110   int max_insns = ((HOST_WIDEST_INT) initial_insns

1111               * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);

1112

1113   /* Put all inline candidates into the heap.  */

1114

1115   for (node = cgraph_nodes ; node; node = node->next)

1116   {

1117     if (!node->local.inlinable || !node->callers

1118         || node->local.disregard_inline_limits)

1119       continue ;

1120      

1121           if (!cgraph_default_inline_p (node))

1122           {

1123              cgraph_set_inline_failed (node,

1124                                   N_("--param max-inline-insns-single limit reached"));

1125              continue ;

1126            }

1127           heap_node[node->uid] =

1128                 fibheap_insert (heap, cgraph_estimate_growth (node), node);

1129         }

 

因为现在是编译器做主,那么函数能否内联,除了它本身适合内联外,还取决于其展开大小。对于声明为“ inline ”的函数,这个限制是 MAX_INLINE_INSNS_SINGLE (默认为 500 ),对于没有声明为“ inline ”的函数,其限制则是 MAX_INLINE_INSNS_AUTO (默认为 100 )。注意,单位是指令,不是行数。

 

1068 static bool

1069 cgraph_default_inline_p (struct cgraph_node *n)                                   in cgraphunit.c

1070 {

1071   if (!DECL_INLINE (n->decl) || !DECL_SAVED_TREE (n->decl))

1072     return false;

1073   if (DECL_DECLARED_INLINE_P (n->decl))

1074     return n->global.insns < MAX_INLINE_INSNS_SINGLE;

1075   else

1076     return n->global.insns < MAX_INLINE_INSNS_AUTO;

1077 }

 

对于不能内联的函数,同时要把其失败的原因告诉其调用者,除非调用者已经被强制内联了(彼时,其 inline_failed NULL )。

 

1081 static void

1082 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason)        in cgraphunit.c

1083 {

1084   struct cgraph_edge *e;

1085

1086   if (cgraph_dump_file )

1087     fprintf (cgraph_dump_file , "Inlining failed: %s/n", reason);

1088   for (e = node->callers; e; e = e->next_caller)

1089     if (e->inline_failed)

1090       e->inline_failed = reason;

1091 }

 

一旦函数的大小在限制之内,就把它加入 Fibonacci heaps ,不过作为键值使用的,则是展开它所带来的指令数的增加量。

 

918  static int

919  cgraph_estimate_growth (struct cgraph_node *node)                             in cgraphunit.c

920  {

921    int growth = 0;

922    int calls_saved = 0;

923    int clones_added = 0;

924    struct cgraph_edge *e;

925 

926    for (e = node->callers; e; e = e->next_caller)

927      if (e->inline_failed)

928      {

929        growth += ((cgraph_estimate_size_after_inlining (1, e->caller, node)

930                    -

931                   e->caller->global.insns) *e->caller->global.cloned_times);

932        calls_saved += e->caller->global.cloned_times;

933        clones_added += e->caller->global.cloned_times;

934      }

935 

936    /* ??? Wrong for self recursive functions or cases where we decide to not

937       inline for different reasons, but it is not big deal as in that case

938      we will keep the body around, but we will also avoid some inlining.  */

939    if (!node->needed && !node->origin && !DECL_EXTERNAL (node->decl))

940      growth -= node->global.insns, clones_added--;

941 

942    if (!calls_saved)

943      calls_saved = 1;

944 

945    return growth;

946  }

 

不过因为现在还没有对调用者、被调用者进行分析,这是个比较粗略的估算(没有考虑该函数所直接、间接调用函数的大小)。随着下面对函数分析的深入,这个估算会趋向准确。也正是因为下面将频繁改写节点的键值,出于效率考虑,采用了 Fibonacci heaps

1133 行, fibheap_extract_min Fibonacci heaps 中移出键值最小的节点。在一开始,这个键值仅是该函数本身的指令数乘以被调用的次数。显然,从键值最小的函数开始处理,是很好的开始,它最有可能是在调用栈最底层的函数。

 

cgraph_decide_inlining_of_small_functions (continue)

 

1131         if (cgraph_dump_file )

1132           fprintf (cgraph_dump_file , "/nDeciding on smaller functions:/n");

1133         while (overall_insns <= max_insns && (node = fibheap_extract_min (heap)))

1134         {

1135           struct cgraph_edge *e;

1136           int old_insns = overall_insns ;

1137      

1138           heap_node[node->uid] = NULL;

1139           if (cgraph_dump_file )

1140             fprintf (cgraph_dump_file ,

1141                    "/nConsidering %s with %i insns/n"

1142                    " Estimated growth is %+i insns./n",

1143                    cgraph_node_name (node), node->global.insns,

1144                    cgraph_estimate_growth (node));

1145           if (!cgraph_default_inline_p (node))

1146           {

1147              cgraph_set_inline_failed (node,

1148                 N_("--param max-inline-insns-single limit reached after inlining into the callee"));

1149              continue ;

1150           }

1151           ninlined_callees = cgraph_inlined_callees (node, inlined_callees);

1152           for (e = node->callers; e; e = e->next_caller)

1153             if (e->inline_failed)

1154                {

1155                  /* Marking recursive function inlinine has sane semantic and

1156                   thus we should not warn on it.  */

1157                  if (e->caller == node)

1158                {

1159                  e->inline_failed = "";

1160                 continue ;

1161                 }

1162                ninlined = cgraph_inlined_into (e->caller, inlined);

1163                  if (e->callee->output)

1164                    e->inline_failed = "";

1165                  if (e->callee->output

1166                  || !cgraph_check_inline_limits (e->caller, node, inlined,

1167                                            ninlined, &e->inline_failed))

1168                 {

1169                 for (i = 0; i < ninlined; i++)

1170                    inlined[i]->output = 0, inlined[i]->aux = 0;

1171                 if (cgraph_dump_file )

1172                    fprintf (cgraph_dump_file , " Not inlining into %s./n",

1173                          cgraph_node_name (e->caller));

1174                 continue ;

1175                 }

1176                  cgraph_mark_inline (e->caller, node, inlined, ninlined,

1177                                 inlined_callees, ninlined_callees);

1178                  if (heap_node[e->caller->uid])

1179                    fibheap_replace_key (heap, heap_node[e->caller->uid],

1180                                    cgraph_estimate_growth (e->caller));

1181      

1182                /* Size of the functions we updated into has changed, so update

1183                     the keys.  */

1184                   for (i = 0; i < ninlined; i++)

1185                 {

1186                 inlined[i]->output = 0, inlined[i]->aux = 0;

1187                  if (heap_node[inlined[i]->uid])

1188                    fibheap_replace_key (heap, heap_node[inlined[i]->uid],

1189                                      cgraph_estimate_growth (inlined[i]));

1190                   }

1191                  if (cgraph_dump_file )

1192                    fprintf (cgraph_dump_file ,

1193                        " Inlined into %s which now has %i insns./n",

1194                          cgraph_node_name (e->caller),

1195                          e->caller->global.insns);

1196                }

1197

1198            /* Similarly all functions called by the function we just inlined

1199              are now called more times; update keys.  */

1200      

1201     for (e = node->callees; e; e = e->next_callee)

1202       if (e->inline_failed && heap_node[e->callee->uid])

1203          fibheap_replace_key (heap, heap_node[e->callee->uid],

1204                            cgraph_estimate_growth (e->callee));

1205

1206     for (i = 0; i < ninlined_callees; i++)

1207     {

1208        struct cgraph_edge *e;

1209

1210        for (e = inlined_callees[i]->callees; e; e = e->next_callee)

1211                  if (e->inline_failed && heap_node[e->callee->uid])

1212            fibheap_replace_key (heap, heap_node[e->callee->uid],

1213                              cgraph_estimate_growth (e->callee));

1214

1215        inlined_callees[i]->output = 0;

1216        inlined_callees[i]->aux = 0;

1217     }

1218     if (cgraph_dump_file )

1219       fprintf (cgraph_dump_file ,

1220              " Inlined %i times for a net change of %+i insns./n",

1221              node->global.cloned_times, overall_insns - old_insns);

1222   }

1223   while ((node = fibheap_extract_min (heap)) != NULL)

1224     if (!node->local.disregard_inline_limits)

1225       cgraph_set_inline_failed (node, N_("--param inline-unit-growth limit reached"));

1226   fibheap_delete (heap);

1227   free (heap_node);

1228 }

 

除了给估算展开大小比较小的函数优先处理外,编译器对内联函数基本上一视同仁,一旦展开到了一定程度,就不再允许内联了。这个规则,一方面是总体指令数的增加率,它由 1133 行的 max_insns 控制(默认情况下,编译器允许指令数 50% 的增长);另一方面则体现在 cgraph_check_inline_limits 中。

 

1018 static bool

1019 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,

1020                         struct cgraph_node **inlined, int ninlined,

1021                          const char **reason)

1022 {

1023   int i;

1024   int times = 0;

1025   struct cgraph_edge *e;

1026   int newsize;

1027   int limit;

1028

1029   for (e = to->callees; e; e = e->next_callee)

1030     if (e->callee == what)

1031       times++;

1032

1033   /* When inlining large function body called once into small function,

1034     take the inlined function as base for limiting the growth.  */

1035   if (to->local.self_insns > what->local.self_insns)

1036     limit = to->local.self_insns;

1037   else

1038     limit = what->local.self_insns;

1039

1040   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;

1041

1042   newsize = cgraph_estimate_size_after_inlining (times, to, what);

1043   if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)

1044       && newsize > limit)

1045   {

1046     *reason = N_("--param large-function-growth limit reached");

1047     return false;

1048   }

1049   for (i = 0; i < ninlined; i++)

1050   {

1051     newsize =

1052        cgraph_estimate_size_after_inlining (INLINED_TIMES (inlined[i]) *

1053                                       times, inlined[i], what);

1054     if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)

1055          && newsize >

1056            inlined[i]->local.self_insns *

1057            (100 + PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH)) / 100)

1058     {

1059        *reason = N_("--param large-function-growth limit reached while inlining the caller");

1060        return false;

1061     }

1062   }

1063   return true;

1064 }

 

PARAM_LARGE_FUNCTION_GROWTH 用于控制因为内联了大函数而导致该部分指令数增长的百分率(默认是 100% ),显然如果该大函数被内联超过 1 次就会超标。而 PARAM_LARGE_FUNCTION_INSNS 则是一个值,当函数的估算指令数超过这个值,就被认为是大函数。

1163 行的 output ,在目前的情形下,它是由 1151 行的 cgraph_inlined_callees 设置的因为一开始 output 都是 0 ,而且在 1152 行的循环中,每次处理完都会重置相关节点的 output 域( 1170 1186 行),因此如果现在发现被调用函数的 output 已经设置了,显然还有别的函数内联了该函数,那么当前函数就不能被其调用者内联了(但依然内联这个被调用函数),以防止这个被调用函数的展开次数出现指数级增长。

一旦当前函数通过了 1165 1166 行的检查,就认为可以内联了,通过 cgraph_mark_inline 来更新相应的参数。作为结果,其调用者展开它的代价也就变了,所以还需要更新 Heaps 中节点(如果还在的话)。

cgraph_decide_inlining_of_small_functions 的最后,如果指令增长率超过了预设,那么剩下的函数一概不许内联。因为 heaps 以展开指令数排序,可以预见剩下的都是比较大的函数。

 

cgraph_decide_inline (continue)

 

1338     /* And finally decide what functions are called once.  */

1339

1340     for (i = nnodes - 1; i >= 0; i--)

1341     {

1342        node = order[i];

1343

1344        if (node->callers && !node->callers->next_caller && !node->needed

1345            && node->local.inlinable && node->callers->inline_failed

1346            && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))

1347        {

1348          bool ok = true;

1349          struct cgraph_node *node1;

1350

1351          /* Verify that we won't duplicate the caller.  */

1352          for (node1 = node->callers->caller;

1353              node1->callers && !node1->callers->inline_failed

1354              && ok; node1 = node1->callers->caller)

1355           if (node1->callers->next_caller || node1->needed)

1356               ok = false;

1357          if (ok)

1358         {

1359            const char *dummy_reason;

1360            if (cgraph_dump_file )

1361              fprintf (cgraph_dump_file ,

1362                     "/nConsidering %s %i insns./n"

1363                      " Called once from %s %i insns./n",

1364                      cgraph_node_name (node), node->global.insns,

1365                      cgraph_node_name (node->callers->caller),

1366                      node->callers->caller->global.insns);

1367            ninlined = cgraph_inlined_into (node->callers->caller,

1368                                        inlined);

1369            old_insns = overall_insns ;

1370

1371            /* Inlining functions once would never cause inlining warnings.  */

1372            if (cgraph_check_inline_limits

1373                (node->callers->caller, node, inlined, ninlined,

1374                 &dummy_reason))

1375            {

1376              ninlined_callees =

1377                    cgraph_inlined_callees (node, inlined_callees);

1378              cgraph_mark_inline (node->callers->caller, node, inlined,

1379                                 ninlined, inlined_callees,

1380                                ninlined_callees);

1381              for (y = 0; y < ninlined_callees; y++)

1382               inlined_callees[y]->output = 0, inlined_callees[y]->aux = 0;

1383               if (cgraph_dump_file )

1384               fprintf (cgraph_dump_file ,

1385                      " Inlined into %s which now has %i insns"

1386                      " for a net change of %+i insns./n",

1387                      cgraph_node_name (node->callers->caller),

1388                      node->callers->caller->global.insns,

1389                      overall_insns - old_insns);

1390            }

1391            else

1392            {

1393              if (cgraph_dump_file )

1394               fprintf (cgraph_dump_file ,

1395                      " Inline limit reached, not inlined./n");

1396            }

1397            for (y = 0; y < ninlined; y++)

1398              inlined[y]->output = 0, inlined[y]->aux = 0;

1399         }

1400        }

1401     }

1402   }

1403   cgraph_remove_unreachable_nodes ();

1404

1405   if (cgraph_dump_file )

1406     fprintf (cgraph_dump_file ,

1407            "/nInlined %i calls, eliminated %i functions, "

1408            "%i insns turned to %i insns./n/n",

1409            ncalls_inlined , nfunctions_inlined , initial_insns ,

1410            overall_insns );

1411   free (order);

1412   free (inlined);

1413   free (inlined_callees);

1414 }

 

回到 cgraph_decide_inline ,对于只被调用一次,而又在上面被冤杀的小函数,编译器还是要网开一面,毕竟内联这些小函数的利益还是比较大。在 1403 行,还要进行可访问分析,移除不需要的函数。

5.13.5.3.2.3.  设置 cgraph_global_info_ready

回到 cgraph_optimize ,在上面的处理中 cgraph_node 节点的 global 部分已经得到设置,在 1596 行,设置 cgraph_global_info_ready 来显示这一事实。

 

你可能感兴趣的:(GCC-3.4.6源代码学习笔记(178))