那么在构建好这部分调用关系网之后,开始真正分析这个函数了。主要的工作集中在分析该函数是否能内联。
cgraph_analyze_function (continue)
330 node->local.inlinable = tree_inlinable_function_p (decl);
331 if (!node->local.self_insns)
332 node->local.self_insns
333 = (*lang_hooks .tree_inlining.estimate_num_insns ) (decl);
334 if (node->local.inlinable)
335 node->local.disregard_inline_limits
336 = (*lang_hooks .tree_inlining.disregard_inline_limits) (decl);
337 for (e = node->callers; e; e = e->next_caller)
338 if (e->inline_failed)
339 {
340 if (node->local.redefined_extern_inline)
341 e->inline_failed = N_("redefined extern inline functions are not "
342 "considered for inlining");
343 else if (!node->local.inlinable)
344 e->inline_failed = N_("function not inlinable");
345 else
346 e->inline_failed = N_("function not considered for inlining");
347 }
348 if (flag_really_no_inline && !node->local.disregard_inline_limits)
349 node->local.inlinable = 0;
350 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
351 node->global.insns = node->local.self_insns;
352 if (!DECL_EXTERNAL (decl))
353 {
354 node->global.cloned_times = 1;
355 node->global.will_be_output = true;
356 }
357
358 node->analyzed = true;
359 current_function_decl = NULL;
360
361 /* Possibly warn about unused parameters. */
362 if (warn_unused_parameter )
363 do_warn_unused_parameter (decl);
364 }
330 行的 tree_inlinable_function_p 的定义如下:
988 bool
989 tree_inlinable_function_p (tree fn) in tree-inline.c
990 {
991 return inlinable_function_p (fn);
992 }
下面的 DECL_UNINLINABLE 如果不是 0 ,表明我们已经确知该函数是不能内联的。
1154 static bool
1155 inlinable_function_p (tree fn) in tree-inline.c
1156 {
1157 bool inlinable = true;
1158
1159 /* If we've already decided this function shouldn't be inlined,
1160 there's no need to check again. */
1161 if (DECL_UNINLINABLE (fn))
1162 return false;
1163
1164 /* See if there is any language-specific reason it cannot be
1165 inlined. (It is important that this hook be called early because
1166 i n C++ it may result in template instantiation.)
1167 If the function is not inlinable for language-specific reasons,
1168 it is left up to the langhook to explain why. */
1169 inlinable = !(*lang_hooks .tree_inlining.cannot_inline_tree_fn ) (&fn);
否则,首先由语言钩子做一个判断。对于 C++ ,钩子绑定了下面的函数。
2054 int
2055 cp_cannot_inline_tree_fn (tree* fnp) in tree.c
2056 {
2057 tree fn = *fnp;
2058
2059 /* We can inline a template instantiation only if it's fully
2060 instantiated. */
2061 if (DECL_TEMPLATE_INFO (fn)
2062 && TI_PENDING_TEMPLATE_FLAG (DECL_TEMPLATE_INFO (fn)))
2063 {
2064 /* Don't instantiate functions that are not going to be
2065 inlined. */
2066 if (!DECL_INLINE (DECL_TEMPLATE_RESULT
2067 (template_for_substitution (fn))))
2068 return 1;
2069
2070 fn = *fnp = instantiate_decl (fn, /*defer_ok=*/ 0);
2071
2072 if (TI_PENDING_TEMPLATE_FLAG (DECL_TEMPLATE_INFO (fn)))
2073 return 1;
2074 }
2075
2076 if (flag_really_no_inline
2077 && lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)) == NULL)
2078 return 1;
2079
2080 /* Don't auto-inline anything that might not be bound within
2081 this unit of translation. */
2082 if (!DECL_DECLARED_INLINE_P (fn) && !(*targetm .binds_local_p) (fn))
2083 {
2084 DECL_UNINLINABLE (fn) = 1;
2085 return 1;
2086 }
2087
2088 if (varargs_function_p (fn))
2089 {
2090 DECL_UNINLINABLE (fn) = 1;
2091 return 1;
2092 }
2093
2094 if (! function_attribute_inlinable_p (fn))
2095 {
2096 DECL_UNINLINABLE (fn) = 1;
2097 return 1;
2098 }
2099
2100 return 0;
2101 }
上面的 2062 行, TI_PENDING_TEMPLATE_FLAG 如果非 0 , 表示该函数模板具现还有待定的参数 。在 2067 行的 template_for_substitution 返回对应的函数模板声明,其中如果 DECL_INLINE 是 1 ,表示应尽可能内联该函数。 DECL_INLINE 是前端视情况设置的,而 DECL_DECLARED_INLINE_P 如果非 0 ,则表示在函数声明中使用了“ inline ”关键字。
这里注意, DECL_INLINE 表示尽可能内联,而 DECL_UNINLINABLE 则表示不能内联。一旦设置了 DECL_UNINLINABLE , DECL_INLINE 就不起作用了。
下面的 flag_inline_trees 如果为 0 ,表示我们不能内联函数。默认的,这个变量的值为 1 。
inlinable_function_p (continue)
1171 /* If we don't have the function body available, we can't inline it.
1172 However, this should not be recorded since we also get here for
1173 forward declared inline functions. Therefore, return at once. */
1174 if (!DECL_SAVED_TREE (fn))
1175 return false;
1176
1177 /* If we're not inlining at all, then we cannot inline this function. */
1178 else if (!flag_inline_trees )
1179 inlinable = false;
1180
1181 /* Only try to inline functions if DECL_INLINE is set. This should be
1182 true for all functions declared `inline', and for all other functions
1183 as well with -finline-functions.
1184
1185 Don't think of disregarding DECL_INLINE when flag_inline_trees == 2;
1186 it's the front-end that must set DECL_INLINE in this case, because
1187 dwarf2out loses if a function that does not have DECL_INLINE set is
1188 inlined anyway. That is why we have both DECL_INLINE and
1189 DECL_DECLARED_INLINE_P. */
1190 /* FIXME: When flag_inline_trees dies, the check for flag_unit_at_a_time
1191 here should be redundant. */
1192 else if (!DECL_INLINE (fn) && !flag_unit_at_a_time )
1193 inlinable = false;
1194
1195 #ifdef INLINER_FOR_JAVA
1196 /* Synchronized methods can't be inlined. This is a bug. */
1197 else if (METHOD_SYNCHRONIZED (fn))
1198 inlinable = false;
1199 #endif /* INLINER_FOR_JAVA */
1200
1201 else if (inline_forbidden_p (fn))
1202 {
1203 /* See if we should warn about uninlinable functions. Previously,
1204 some of these warnings would be issued while trying to expand
1205 the function inline, but that would cause multiple warnings
1206 about functions that would for example call alloca. But since
1207 this a property of the function, just one warning is enough.
1208 As a bonus we can now give more details about the reason why a
1209 function is not inlinable.
1210 We only warn for functions declared `inline' by the user. */
1211 bool do_warning = (warn_inline
1212 && DECL_INLINE (fn)
1213 && DECL_DECLARED_INLINE_P (fn)
1214 && !DECL_IN_SYSTEM_HEADER (fn));
1215
1216 if (lookup_attribute ("always_inline",
1217 DECL_ATTRIBUTES (fn)))
1218 sorry (inline_forbidden_reason , fn, fn);
1219 else if (do_warning)
1220 warning (inline_forbidden_reason , fn, fn);
1221
1222 inlinable = false;
1223 }
1224
1225 /* Squirrel away the result so that we don't have to check again. */
1226 DECL_UNINLINABLE (fn) = !inlinable;
1227
1228 return inlinable;
1229 }
虽然前端和语言钩子都初步认定函数可以内联,但是该函数可能包含了不适合内联的内容。这需要下面的函数来确认。如果不能内联,该函数将返回 0 。
1141 static tree
1142 inline_forbidden_p (tree fndecl) in tree-inline.c
1143 {
1144 location_t saved_loc = input_location ;
1145 tree ret = walk_tree_without_duplicates
1146 (&DECL_SAVED_TREE (fndecl), inline_forbidden_p_1 , fndecl);
1147 input_location = saved_loc;
1148 return ret;
1149 }
同样通过 walk_tree 来遍历函数体。下面 1012 行, alloca_call_p 如果返回非 0 值,表示该函数调用了 alloca 函数(它在栈上分配指定大小的内存,在调用它的函数返回时,内存才被自动释放),因此除非用户强制要求(通过属性“ always_inline ”),不要内联这个函数。另外,如果调用了 setjmp 或 longjmp 函数,这个函数也是不能内联的。前面我们看到, GCC 自身带有内建函数,提供基本库函数原型的中间形式,这样我们可以不需要这些库函数的头文件,就可以调用它们。调用了可变参数内建函数,及 setjmp 或 longjmp 内建函数的函数,同样也不能内联。
996 static tree
997 inline_forbidden_p_1 (tree *nodep, int *walk_subtrees ATTRIBUTE_UNUSED,
998 void *fnp)
999 {
1000 tree node = *nodep;
1001 tree fn = (tree) fnp;
1002 tree t;
1003
1004 switch (TREE_CODE (node))
1005 {
1006 case CALL_EXPR:
1007 /* Refuse to inline alloca call unless user explicitly forced so as
1008 this may change program's memory overhead drastically when the
1009 function using alloca is called in loop. In GCC present in
1010 SPEC2000 inlining into schedule_block cause it to require 2GB of
1011 RAM instead of 256MB. */
1012 if (alloca_call_p (node)
1013 && !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)))
1014 {
1015 inline_forbidden_reason
1016 = N_("%Jfunction '%F' can never be inlined because it uses "
1017 "alloca (override using the always_inline attribute)");
1018 return node;
1019 }
1020 t = get_callee_fndecl (node);
1021 if (! t)
1022 break ;
1023
1024
1025 /* We cannot inline functions that call setjmp. */
1026 if (setjmp_call_p (t))
1027 {
1028 inline_forbidden_reason
1029 = N_("%Jfunction '%F' can never be inlined because it uses setjmp");
1030 return node;
1031 }
1032
1033 if (DECL_BUILT_IN (t))
1034 switch (DECL_FUNCTION_CODE (t))
1035 {
1036 /* We cannot inline functions that take a variable number of
1037 arguments. */
1038 case BUILT_IN_VA_START:
1039 case BUILT_IN_STDARG_START:
1040 case BUILT_IN_NEXT_ARG:
1041 case BUILT_IN_VA_END:
1042 {
1043 inline_forbidden_reason
1044 = N_("%Jfunction '%F' can never be inlined because it "
1045 "uses variable argument lists");
1046 return node;
1047 }
1048 case BUILT_IN_LONGJMP:
1049 {
1050 /* We can't inline functions that call __builtin_longjmp at
1051 all. The non-local goto machinery really requires the
1052 destination be in a different function. If we allow the
1053 function calling __builtin_longjmp to be inlined into the
1054 function calling __builtin_setjmp, Things will Go Awry. */
1055 /* ??? Need front end help to identify "regular" non-local
1056 goto. */
1057 if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL)
1058 {
1059 inline_forbidden_reason
1060 = N_("%Jfunction '%F' can never be inlined because "
1061 "it uses setjmp-longjmp exception handling");
1062 return node;
1063 }
1064 }
1065
1066 default :
1067 break ;
1068 }
1069 break ;
1070
1071 #ifndef INLINER_FOR_JAVA
1072 case DECL_STMT:
1073 /* We cannot inline functions that contain other functions. */
1074 if (TREE_CODE (TREE_OPERAND (node, 0)) == FUNCTION_DECL
1075 && DECL_INITIAL (TREE_OPERAND (node, 0)))
1076 {
1077 inline_forbidden_reason
1078 = N_("%Jfunction '%F' can never be inlined "
1079 "because it contains a nested function");
1080 return node;
1081 }
1082 break ;
1083
1084 case GOTO_STMT:
1085 case GOTO_EXPR:
1086 t = TREE_OPERAND (node, 0);
1087
1088 /* We will not inline a function which uses computed goto. The
1089 addresses of its local labels, which may be tucked into
1090 global storage, are of course not constant across
1091 instantiations, which causes unexpected behavior. */
1092 if (TREE_CODE (t) != LABEL_DECL)
1093 {
1094 inline_forbidden_reason
1095 = N_("%Jfunction '%F' can never be inlined "
1096 "because it contains a computed goto");
1097 return node;
1098 }
1099
1100 /* We cannot inline a nested function that jumps to a nonlocal
1101 label. */
1102 if (TREE_CODE (t) == LABEL_DECL && DECL_CONTEXT (t) != fn)
1103 {
1104 inline_forbidden_reason
1105 = N_("%Jfunction '%F' can never be inlined "
1106 "because it contains a nonlocal goto");
1107 return node;
1108 }
1109
1110 break ;
1111
1112 case RECORD_TYPE:
1113 case UNION_TYPE:
1114 /* We cannot inline a function of the form
1115
1116 void F (int i) { struct S { int ar[i]; } s; }
1117
1118 Attempting to do so produces a catch-22.
1119 If walk_tree examines the TYPE_FIELDS chain of RECORD_TYPE/
1120 UNION_TYPE nodes, then it goes into infinite recursion on a
1121 structure containing a pointer to its own type. If it doesn't,
1122 then the type node for S doesn't get adjusted properly when
1123 F is inlined, and we abort in find_function_data. */
1124 for (t = TYPE_FIELDS (node); t; t = TREE_CHAIN (t))
1125 if (variably_modified_type_p (TREE_TYPE (t)))
1126 {
1127 inline_forbidden_reason
1128 = N_("%Jfunction '%F' can never be inlined "
1129 "because it uses variable sized variables");
1130 return node;
1131 }
1132 #endif
1133 default :
1134 break ;
1135 }
1136
1137 return NULL_TREE;
1138 }
除了上面的情况, GNU 提供了一个扩展支持定义的函数中的嵌入函数,对于包含嵌入函数的函数,它也是不能内联的。另一个 GNU 提供的扩展是支持可计算 goto 表达式,比如:
void *ptr;
/* ... */
ptr = &&foo;
一元操作符“ && ”获取当前函数中定义的标签(上例的“ foo ”)的地址,表达式:“ goto *ptr; ”就是可计算 goto 表达式。包含了这种表达式的函数亦不能内联。
GNU 还有一个扩展,支持函数内定义嵌套类,其中的一个形式如 1116 行注释所描述。这也是不能内联的。
如果上述检查都通过了,函数是否能内联可能还会受到大小上的限制,那么接着需要估算其包含的指令数,这个数据在后面优化时会用到。
5833 int
5834 c _estimate_num_insns (tree decl) in c-common.c
5835 {
5836 int num = 0;
5837 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl), c_estimate_num_insns_1 , &num);
5838 return num;
5839 }
下面 MOVE_MAX_PIECES ( x86 定义为 4 )定义了我们一次可以高效移动的字节数,相比较 MOVE_MAX ( x86 定义为 16 )是单个指令可以移动的最大字节数。而如果一个内存到内存移动需要 MOVE_RATIO ,或更多简单的移动指令对,我们将代而使用 movstr 或 libcall (即函数调用)。
5716 static tree
5717 c _estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)
5718 {
5719 int *count = data;
5720 tree x = *tp;
5721
5722 if (TYPE_P (x) || DECL_P (x))
5723 {
5724 *walk_subtrees = 0;
5725 return NULL;
5726 }
5727 /* Assume that constants and references counts nothing. These should
5728 be majorized by amount of operations among them we count later
5729 and are common target of CSE and similar optimizations. */
5730 if (TREE_CODE_CLASS (TREE_CODE (x)) == 'c'
5731 || TREE_CODE_CLASS (TREE_CODE (x)) == 'r')
5732 return NULL;
5733 switch (TREE_CODE (x))
5734 {
5735 /* Recognize assignments of large structures and constructors of
5736 big arrays. */
5737 case MODIFY_EXPR:
5738 case CONSTRUCTOR:
5739 {
5740 HOST_WIDE_INT size;
5741
5742 size = int_size_in_bytes (TREE_TYPE (x));
5743
5744 if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO)
5745 *count += 10;
5746 else
5747 *count += ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
5748 }
5749 break ;
5750 case CALL_EXPR:
5751 {
5752 tree decl = get_callee_fndecl (x);
5753
5754 if (decl && DECL_BUILT_IN (decl))
5755 switch (DECL_FUNCTION_CODE (decl))
5756 {
5757 case BUILT_IN_CONSTANT_P:
5758 *walk_subtrees = 0;
5759 return NULL_TREE;
5760 case BUILT_IN_EXPECT:
5761 return NULL_TREE;
5762 default :
5763 break ;
5764 }
5765 *count += 10;
5766 break ;
5767 }
5768 /* Few special cases of expensive operations. This is usefull
5769 to avoid inlining on functions having too many of these. */
5770 case TRUNC_DIV_EXPR:
5771 case CEIL_DIV_EXPR:
5772 case FLOOR_DIV_EXPR:
5773 case ROUND_DIV_EXPR:
5774 case TRUNC_MOD_EXPR:
5775 case CEIL_MOD_EXPR:
5776 case FLOOR_MOD_EXPR:
5777 case ROUND_MOD_EXPR:
5778 case RDIV_EXPR:
5779 *count += 10;
5780 break ;
5781 /* Various containers that will produce no code themselves. */
5782 case INIT_EXPR:
5783 case TARGET_EXPR:
5784 case BIND_EXPR:
5785 case BLOCK:
5786 case TREE_LIST:
5787 case TREE_VEC:
5788 case IDENTIFIER_NODE:
5789 case PLACEHOLDER_EXPR:
5790 case WITH_CLEANUP_EXPR:
5791 case CLEANUP_POINT_EXPR:
5792 case NOP_EXPR:
5793 case VIEW_CONVERT_EXPR:
5794 case SAVE_EXPR:
5795 case UNSAVE_EXPR:
5796 case COMPLEX_EXPR:
5797 case REALPART_EXPR:
5798 case IMAGPART_EXPR:
5799 case TRY_CATCH_EXPR:
5800 case TRY_FINALLY_EXPR:
5801 case LABEL_EXPR:
5802 case EXIT_EXPR:
5803 case LABELED_BLOCK_EXPR:
5804 case EXIT_BLOCK_EXPR:
5805 case EXPR_WITH_FILE_LOCATION:
5806
5807 case EXPR_STMT:
5808 case COMPOUND_STMT:
5809 case RETURN_STMT:
5810 case LABEL_STMT:
5811 case SCOPE_STMT:
5812 case FILE_STMT:
5813 case CASE_LABEL:
5814 case STMT_EXPR:
5815 case CLEANUP_STMT:
5816
5817 case SIZEOF_EXPR:
5818 case ARROW_EXPR:
5819 case ALIGNOF_EXPR:
5820 break ;
5821 case DECL_STMT:
5822 /* Do not account static initializers. */
5823 if (TREE_STATIC (TREE_OPERAND (x, 0)))
5824 *walk_subtrees = 0;
5825 break ;
5826 default :
5827 (*count)++;
5828 }
5829 return NULL;
5830 }
接下来在 cgraph_analyze_function 的 336 行,钩子 disregard_inline_limits 确定这个函数是否应该被内联,即便它超过内联的大小限制。这里对于 C++ ,这个钩子绑定到函数 lhd_tree_inlining_disregard_inline_limits ,具有“ always_inline ”属性的函数就属于这个例外。
从 cgraph_analyze_function 返回 cgraph_finalize_compilation_unit ,因为这个函数已明确会被调用,那么遍历其调用的函数,把它们都设置为可到达。同时, record_call_1 有可能向 cgraph_varpool_nodes_queue 队列加入对象,调用 cgraph_varpool_assemble_pending_decls 来输出这些对象的汇编。
cgraph_finalize_compilation_unit (continue)
423 /* Collect entry points to the unit. */
424
425 if (cgraph_dump_file )
426 {
427 fprintf (cgraph_dump_file , "Unit entry points:");
428 for (node = cgraph_nodes ; node; node = node->next)
429 if (node->needed && DECL_SAVED_TREE (node->decl))
430 fprintf (cgraph_dump_file , " %s", cgraph_node_name (node));
431 fprintf (cgraph_dump_file , "/n/nInitial ");
432 dump_cgraph (cgraph_dump_file );
433 }
434
435 if (cgraph_dump_file )
436 fprintf (cgraph_dump_file , "/nReclaiming functions:");
437
438 for (node = cgraph_nodes ; node; node = node->next)
439 {
440 tree decl = node->decl;
441
442 if (!node->reachable && DECL_SAVED_TREE (decl))
443 {
444 cgraph_remove_node (node);
445 if (cgraph_dump_file )
446 fprintf (cgraph_dump_file , " %s", cgraph_node_name (node));
447 }
448 else
449 node->next_needed = NULL;
450 }
451 if (cgraph_dump_file )
452 {
453 fprintf (cgraph_dump_file , "/n/nReclaimed ");
454 dump_cgraph (cgraph_dump_file );
455 }
456 ggc_collect ();
457 timevar_pop (TV_CGRAPH);
458 }
完成了对 cgraph_nodes_queue 的遍历之后,在该队列中剩下的函数,要么还没有函数体,要么没人调用。在 438 行,把后者从 cgraph 网中除去。