GCC-3.4.6源代码学习笔记(174)

5.13.5.3.1.2.  分析函数能否内联

那么在构建好这部分调用关系网之后,开始真正分析这个函数了。主要的工作集中在分析该函数是否能内联。

 

cgraph_analyze_function (continue)

 

330    node->local.inlinable = tree_inlinable_function_p (decl);

331    if (!node->local.self_insns)

332      node->local.self_insns

333        = (*lang_hooks .tree_inlining.estimate_num_insns ) (decl);

334    if (node->local.inlinable)

335      node->local.disregard_inline_limits

336        = (*lang_hooks .tree_inlining.disregard_inline_limits) (decl);

337    for (e = node->callers; e; e = e->next_caller)

338      if (e->inline_failed)

339      {

340        if (node->local.redefined_extern_inline)

341          e->inline_failed = N_("redefined extern inline functions are not "

342                            "considered for inlining");

343        else if (!node->local.inlinable)

344          e->inline_failed = N_("function not inlinable");

345        else

346          e->inline_failed = N_("function not considered for inlining");

347      }

348    if (flag_really_no_inline && !node->local.disregard_inline_limits)

349      node->local.inlinable = 0;

350    /* Inlining characteristics are maintained by the cgraph_mark_inline.  */

351    node->global.insns = node->local.self_insns;

352    if (!DECL_EXTERNAL (decl))

353    {

354      node->global.cloned_times = 1;

355      node->global.will_be_output = true;

356    }

357 

358    node->analyzed = true;

359    current_function_decl = NULL;

360 

361    /* Possibly warn about unused parameters.  */

362    if (warn_unused_parameter )

363      do_warn_unused_parameter (decl);

364  }

 

330 行的 tree_inlinable_function_p 的定义如下:

 

988  bool

989  tree_inlinable_function_p (tree fn)                                                         in tree-inline.c

990  {

991    return inlinable_function_p (fn);

992  }

 

下面的 DECL_UNINLINABLE 如果不是 0 ,表明我们已经确知该函数是不能内联的。

 

1154       static bool

1155       inlinable_function_p (tree fn)                                                                in tree-inline.c

1156       {

1157         bool inlinable = true;

1158      

1159         /* If we've already decided this function shouldn't be inlined,

1160           there's no need to check again.  */

1161         if (DECL_UNINLINABLE (fn))

1162           return false;

1163      

1164         /* See if there is any language-specific reason it cannot be

1165           inlined. (It is important that this hook be called early because

1166           i n C++ it may result in template instantiation.)

1167           If the function is not inlinable for language-specific reasons,

1168           it is left up to the langhook to explain why.  */

1169         inlinable = !(*lang_hooks .tree_inlining.cannot_inline_tree_fn ) (&fn);

 

否则,首先由语言钩子做一个判断。对于 C++ ,钩子绑定了下面的函数。

 

2054 int

2055 cp_cannot_inline_tree_fn (tree* fnp)                                                                  in tree.c

2056 {

2057   tree fn = *fnp;

2058

2059   /* We can inline a template instantiation only if it's fully

2060     instantiated.  */

2061   if (DECL_TEMPLATE_INFO (fn)

2062       && TI_PENDING_TEMPLATE_FLAG (DECL_TEMPLATE_INFO (fn)))

2063   {

2064     /* Don't instantiate functions that are not going to be

2065        inlined.  */

2066     if (!DECL_INLINE (DECL_TEMPLATE_RESULT

2067                (template_for_substitution (fn))))

2068       return 1;

2069

2070     fn = *fnp = instantiate_decl (fn, /*defer_ok=*/ 0);

2071

2072     if (TI_PENDING_TEMPLATE_FLAG (DECL_TEMPLATE_INFO (fn)))

2073       return 1;

2074   }

2075

2076   if (flag_really_no_inline

2077       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)) == NULL)

2078     return 1;

2079

2080   /* Don't auto-inline anything that might not be bound within

2081     this unit of translation.  */

2082   if (!DECL_DECLARED_INLINE_P (fn) && !(*targetm .binds_local_p) (fn))

2083   {

2084     DECL_UNINLINABLE (fn) = 1;

2085     return 1;

2086   }

2087

2088   if (varargs_function_p (fn))

2089   {

2090     DECL_UNINLINABLE (fn) = 1;

2091     return 1;

2092   }

2093

2094   if (! function_attribute_inlinable_p (fn))

2095   {

2096     DECL_UNINLINABLE (fn) = 1;

2097     return 1;

2098   }

2099

2100   return 0;

2101 }

 

上面的 2062 行, TI_PENDING_TEMPLATE_FLAG 如果非 0 表示该函数模板具现还有待定的参数 。在 2067 行的 template_for_substitution 返回对应的函数模板声明,其中如果 DECL_INLINE 1 ,表示应尽可能内联该函数。 DECL_INLINE 是前端视情况设置的,而 DECL_DECLARED_INLINE_P 如果非 0 ,则表示在函数声明中使用了“ inline ”关键字。

这里注意, DECL_INLINE 表示尽可能内联,而 DECL_UNINLINABLE 则表示不能内联。一旦设置了 DECL_UNINLINABLE DECL_INLINE 就不起作用了。

下面的 flag_inline_trees 如果为 0 ,表示我们不能内联函数。默认的,这个变量的值为 1

 

inlinable_function_p (continue)

 

1171         /* If we don't have the function body available, we can't inline it.

1172           However, this should not be recorded since we also get here for

1173           forward declared inline functions. Therefore, return at once.  */

1174         if (!DECL_SAVED_TREE (fn))

1175           return false;

1176      

1177         /* If we're not inlining at all, then we cannot inline this function.  */

1178         else if (!flag_inline_trees )

1179           inlinable = false;

1180      

1181         /* Only try to inline functions if DECL_INLINE is set. This should be

1182           true for all functions declared `inline', and for all other functions

1183           as well with -finline-functions.

1184      

1185           Don't think of disregarding DECL_INLINE when flag_inline_trees == 2;

1186           it's the front-end that must set DECL_INLINE in this case, because

1187           dwarf2out loses if a function that does not have DECL_INLINE set is

1188            inlined anyway. That is why we have both DECL_INLINE and

1189           DECL_DECLARED_INLINE_P.  */

1190         /* FIXME: When flag_inline_trees dies, the check for flag_unit_at_a_time

1191           here should be redundant.  */

1192         else if (!DECL_INLINE (fn) && !flag_unit_at_a_time )

1193           inlinable = false;

1194      

1195        #ifdef INLINER_FOR_JAVA

1196         /* Synchronized methods can't be inlined. This is a bug.  */

1197         else if (METHOD_SYNCHRONIZED (fn))

1198           inlinable = false;

1199       #endif /* INLINER_FOR_JAVA */

1200      

1201   else if (inline_forbidden_p (fn))

1202   {

1203     /* See if we should warn about uninlinable functions. Previously,

1204       some of these warnings would be issued while trying to expand

1205        the function inline, but that would cause multiple warnings

1206        about functions that would for example call alloca. But since

1207        this a property of the function, just one warning is enough.

1208        As a bonus we can now give more details about the reason why a

1209        function is not inlinable.

1210        We only warn for functions declared `inline' by the user.  */

1211     bool do_warning = (warn_inline

1212                      && DECL_INLINE (fn)

1213                      && DECL_DECLARED_INLINE_P (fn)

1214                      && !DECL_IN_SYSTEM_HEADER (fn));

1215

1216     if (lookup_attribute ("always_inline",

1217                       DECL_ATTRIBUTES (fn)))

1218       sorry (inline_forbidden_reason , fn, fn);

1219     else if (do_warning)

1220       warning (inline_forbidden_reason , fn, fn);

1221

1222     inlinable = false;

1223   }

1224

1225   /* Squirrel away the result so that we don't have to check again.  */

1226   DECL_UNINLINABLE (fn) = !inlinable;

1227

1228   return inlinable;

1229 }

 

虽然前端和语言钩子都初步认定函数可以内联,但是该函数可能包含了不适合内联的内容。这需要下面的函数来确认。如果不能内联,该函数将返回 0

 

1141       static tree

1142       inline_forbidden_p (tree fndecl)                                                            in tree-inline.c

1143       {

1144         location_t saved_loc = input_location ;

1145         tree ret = walk_tree_without_duplicates

1146                     (&DECL_SAVED_TREE (fndecl), inline_forbidden_p_1 , fndecl);

1147         input_location = saved_loc;

1148         return ret;

1149       }

 

同样通过 walk_tree 来遍历函数体。下面 1012 行, alloca_call_p 如果返回非 0 值,表示该函数调用了 alloca 函数(它在栈上分配指定大小的内存,在调用它的函数返回时,内存才被自动释放),因此除非用户强制要求(通过属性“ always_inline ”),不要内联这个函数。另外,如果调用了 setjmp longjmp 函数,这个函数也是不能内联的。前面我们看到, GCC 自身带有内建函数,提供基本库函数原型的中间形式,这样我们可以不需要这些库函数的头文件,就可以调用它们。调用了可变参数内建函数,及 setjmp longjmp 内建函数的函数,同样也不能内联。

 

996  static tree

997  inline_forbidden_p_1 (tree *nodep, int *walk_subtrees ATTRIBUTE_UNUSED,

998                      void *fnp)

999  {

1000   tree node = *nodep;

1001   tree fn = (tree) fnp;

1002   tree t;

1003

1004   switch (TREE_CODE (node))

1005   {

1006     case CALL_EXPR:

1007        /* Refuse to inline alloca call unless user explicitly forced so as

1008          this may change program's memory overhead drastically when the

1009          function using alloca is called in loop. In GCC present in

1010          SPEC2000 inlining into schedule_block cause it to require 2GB of

1011                RAM instead of 256MB.  */

1012       if (alloca_call_p (node)

1013           && !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)))

1014       {

1015          inline_forbidden_reason

1016             = N_("%Jfunction '%F' can never be inlined because it uses "

1017                  "alloca (override using the always_inline attribute)");

1018          return node;

1019       }

1020       t = get_callee_fndecl (node);

1021       if (! t)

1022         break ;

1023

1024

1025       /* We cannot inline functions that call setjmp.  */

1026       if (setjmp_call_p (t))

1027       {

1028          inline_forbidden_reason

1029              = N_("%Jfunction '%F' can never be inlined because it uses setjmp");

1030          return node;

1031       }

1032

1033       if (DECL_BUILT_IN (t))

1034         switch (DECL_FUNCTION_CODE (t))

1035          {

1036            /* We cannot inline functions that take a variable number of

1037              arguments.  */

1038             case BUILT_IN_VA_START:

1039            case BUILT_IN_STDARG_START:

1040            case BUILT_IN_NEXT_ARG:

1041            case BUILT_IN_VA_END:

1042            {

1043              inline_forbidden_reason

1044                = N_("%Jfunction '%F' can never be inlined because it "

1045                      "uses variable argument lists");

1046               return node;

1047            }

1048            case BUILT_IN_LONGJMP:

1049            {

1050               /* We can't inline functions that call __builtin_longjmp at

1051                all. The non-local goto machinery really requires the

1052                destination be in a different function. If we allow the

1053                function calling __builtin_longjmp to be inlined into the

1054                function calling __builtin_setjmp, Things will Go Awry.  */

1055              /* ??? Need front end help to identify "regular" non-local

1056                goto.  */

1057              if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL)

1058             {

1059                inline_forbidden_reason

1060                    = N_("%Jfunction '%F' can never be inlined because "

1061                         "it uses setjmp-longjmp exception handling");

1062                return node;

1063             }

1064            }

1065

1066            default :

1067              break ;

1068           }

1069        break ;

1070

1071 #ifndef INLINER_FOR_JAVA

1072     case DECL_STMT:

1073       /* We cannot inline functions that contain other functions.  */

1074       if (TREE_CODE (TREE_OPERAND (node, 0)) == FUNCTION_DECL

1075            && DECL_INITIAL (TREE_OPERAND (node, 0)))

1076       {

1077          inline_forbidden_reason

1078             = N_("%Jfunction '%F' can never be inlined "

1079                 "because it contains a nested function");

1080          return node;

1081       }

1082       break ;

1083

1084     case GOTO_STMT:

1085     case GOTO_EXPR:

1086       t = TREE_OPERAND (node, 0);

1087

1088       /* We will not inline a function which uses computed goto. The

1089          addresses of its local labels, which may be tucked into

1090          global storage, are of course not constant across

1091          instantiations, which causes unexpected behavior.  */

1092       if (TREE_CODE (t) != LABEL_DECL)

1093       {

1094          inline_forbidden_reason

1095             = N_("%Jfunction '%F' can never be inlined "

1096                  "because it contains a computed goto");

1097          return node;

1098       }

1099

1100       /* We cannot inline a nested function that jumps to a nonlocal

1101         label.  */

1102       if (TREE_CODE (t) == LABEL_DECL && DECL_CONTEXT (t) != fn)

1103              {

1104                 inline_forbidden_reason

1105                     = N_("%Jfunction '%F' can never be inlined "

1106                          "because it contains a nonlocal goto");

1107                 return node;

1108              }

1109

1110        break ;

1111

1112     case RECORD_TYPE:

1113     case UNION_TYPE:

1114       /* We cannot inline a function of the form

1115

1116          void F (int i) { struct S { int ar[i]; } s; }

1117

1118         Attempting to do so produces a catch-22.

1119         If walk_tree examines the TYPE_FIELDS chain of RECORD_TYPE/

1120                UNION_TYPE nodes, then it goes into infinite recursion on a

1121                 structure containing a pointer to its own type. If it doesn't,

1122                then the type node for S doesn't get adjusted properly when

1123                F is inlined, and we abort in find_function_data.  */

1124       for (t = TYPE_FIELDS (node); t; t = TREE_CHAIN (t))

1125                if (variably_modified_type_p (TREE_TYPE (t)))

1126                 {

1127                   inline_forbidden_reason

1128                      = N_("%Jfunction '%F' can never be inlined "

1129                           "because it uses variable sized variables");

1130                   return node;

1131                 }

1132 #endif

1133     default :

1134       break ;

1135   }

1136

1137   return NULL_TREE;

1138 }

 

除了上面的情况, GNU 提供了一个扩展支持定义的函数中的嵌入函数,对于包含嵌入函数的函数,它也是不能内联的。另一个 GNU 提供的扩展是支持可计算 goto 表达式,比如:

void *ptr;

/* ... */

ptr = &&foo;

一元操作符“ && ”获取当前函数中定义的标签(上例的“ foo ”)的地址,表达式:“ goto *ptr; ”就是可计算 goto 表达式。包含了这种表达式的函数亦不能内联。

GNU 还有一个扩展,支持函数内定义嵌套类,其中的一个形式如 1116 行注释所描述。这也是不能内联的。

如果上述检查都通过了,函数是否能内联可能还会受到大小上的限制,那么接着需要估算其包含的指令数,这个数据在后面优化时会用到。

 

5833 int

5834 c _estimate_num_insns (tree decl)                                                        in c-common.c

5835 {

5836    int num = 0;

5837    walk_tree_without_duplicates (&DECL_SAVED_TREE (decl), c_estimate_num_insns_1 , &num);

5838    return num;

5839 }

 

下面 MOVE_MAX_PIECES x86 定义为 4 )定义了我们一次可以高效移动的字节数,相比较 MOVE_MAX x86 定义为 16 )是单个指令可以移动的最大字节数。而如果一个内存到内存移动需要 MOVE_RATIO ,或更多简单的移动指令对,我们将代而使用 movstr libcall (即函数调用)。

 

5716 static tree

5717 c _estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)

5718 {

5719    int *count = data;

5720    tree x = *tp;

5721

5722    if (TYPE_P (x) || DECL_P (x))

5723    {

5724      *walk_subtrees = 0;

5725      return NULL;

5726    }

5727    /* Assume that constants and references counts nothing. These should

5728        be majorized by amount of operations among them we count later

5729      and are common target of CSE and similar optimizations.  */

5730    if (TREE_CODE_CLASS (TREE_CODE (x)) == 'c'

5731        || TREE_CODE_CLASS (TREE_CODE (x)) == 'r')

5732      return NULL;

5733    switch (TREE_CODE (x))

5734    {

5735      /* Recognize assignments of large structures and constructors of

5736        big arrays.  */

5737      case MODIFY_EXPR:

5738      case CONSTRUCTOR:

5739      {

5740        HOST_WIDE_INT size;

5741

5742        size = int_size_in_bytes (TREE_TYPE (x));

5743

5744        if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO)

5745           *count += 10;

5746        else

5747           *count += ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);

5748         }

5749       break ;

5750       case CALL_EXPR:

5751       {

5752        tree decl = get_callee_fndecl (x);

5753

5754        if (decl && DECL_BUILT_IN (decl))

5755             switch (DECL_FUNCTION_CODE (decl))

5756            {

5757               case BUILT_IN_CONSTANT_P:

5758                 *walk_subtrees = 0;

5759                 return NULL_TREE;

5760             case BUILT_IN_EXPECT:

5761                return NULL_TREE;

5762               default :

5763                 break ;

5764           }

5765        *count += 10;

5766        break ;

5767        }

5768       /* Few special cases of expensive operations. This is usefull

5769          to avoid inlining on functions having too many of these.  */

5770        case TRUNC_DIV_EXPR:

5771        case CEIL_DIV_EXPR:

5772        case FLOOR_DIV_EXPR:

5773        case ROUND_DIV_EXPR:

5774        case TRUNC_MOD_EXPR:

5775       case CEIL_MOD_EXPR:

5776        case FLOOR_MOD_EXPR:

5777        case ROUND_MOD_EXPR:

5778        case RDIV_EXPR:

5779          *count += 10;

5780         break ;

5781       /* Various containers that will produce no code themselves.  */

5782        case INIT_EXPR:

5783        case TARGET_EXPR:

5784       case BIND_EXPR:

5785       case BLOCK:

5786       case TREE_LIST:

5787        case TREE_VEC:

5788        case IDENTIFIER_NODE:

5789       case PLACEHOLDER_EXPR:

5790        case WITH_CLEANUP_EXPR:

5791        case CLEANUP_POINT_EXPR:

5792       case NOP_EXPR:

5793       case VIEW_CONVERT_EXPR:

5794        case SAVE_EXPR:

5795        case UNSAVE_EXPR:

5796        case COMPLEX_EXPR:

5797        case REALPART_EXPR:

5798       case IMAGPART_EXPR:

5799       case TRY_CATCH_EXPR:

5800        case TRY_FINALLY_EXPR:

5801        case LABEL_EXPR:

5802       case EXIT_EXPR:

5803        case LABELED_BLOCK_EXPR:

5804        case EXIT_BLOCK_EXPR:

5805       case EXPR_WITH_FILE_LOCATION:

5806

5807       case EXPR_STMT:

5808        case COMPOUND_STMT:

5809        case RETURN_STMT:

5810       case LABEL_STMT:

5811       case SCOPE_STMT:

5812        case FILE_STMT:

5813        case CASE_LABEL:

5814       case STMT_EXPR:

5815        case CLEANUP_STMT:

5816

5817       case SIZEOF_EXPR:

5818        case ARROW_EXPR:

5819        case ALIGNOF_EXPR:

5820          break ;

5821        case DECL_STMT:

5822          /* Do not account static initializers.  */

5823          if (TREE_STATIC (TREE_OPERAND (x, 0)))

5824          *walk_subtrees = 0;

5825         break ;

5826        default :

5827          (*count)++;

5828    }

5829    return NULL;

5830 }

 

接下来在 cgraph_analyze_function 336 行,钩子 disregard_inline_limits 确定这个函数是否应该被内联,即便它超过内联的大小限制。这里对于 C++ ,这个钩子绑定到函数 lhd_tree_inlining_disregard_inline_limits ,具有“ always_inline ”属性的函数就属于这个例外。

cgraph_analyze_function 返回 cgraph_finalize_compilation_unit ,因为这个函数已明确会被调用,那么遍历其调用的函数,把它们都设置为可到达。同时, record_call_1 有可能向 cgraph_varpool_nodes_queue 队列加入对象,调用 cgraph_varpool_assemble_pending_decls 来输出这些对象的汇编。

 

cgraph_finalize_compilation_unit (continue)

 

423    /* Collect entry points to the unit.  */

424 

425    if (cgraph_dump_file )

426    {

427      fprintf (cgraph_dump_file , "Unit entry points:");

428      for (node = cgraph_nodes ; node; node = node->next)

429        if (node->needed && DECL_SAVED_TREE (node->decl))

430      fprintf (cgraph_dump_file , " %s", cgraph_node_name (node));

431      fprintf (cgraph_dump_file , "/n/nInitial ");

432      dump_cgraph (cgraph_dump_file );

433    }

434 

435    if (cgraph_dump_file )

436      fprintf (cgraph_dump_file , "/nReclaiming functions:");

437 

438    for (node = cgraph_nodes ; node; node = node->next)

439    {

440      tree decl = node->decl;

441 

442      if (!node->reachable && DECL_SAVED_TREE (decl))

443      {

444        cgraph_remove_node (node);

445        if (cgraph_dump_file )

446          fprintf (cgraph_dump_file , " %s", cgraph_node_name (node));

447      }

448      else

449        node->next_needed = NULL;

450    }

451    if (cgraph_dump_file )

452    {

453      fprintf (cgraph_dump_file , "/n/nReclaimed ");

454      dump_cgraph (cgraph_dump_file );

455    }

456    ggc_collect ();

457    timevar_pop (TV_CGRAPH);

458  }

 

完成了对 cgraph_nodes_queue 的遍历之后,在该队列中剩下的函数,要么还没有函数体,要么没人调用。在 438 行,把后者从 cgraph 网中除去。

 

你可能感兴趣的:(function,tree,File,attributes,Duplicates,compilation)