Studying note of GCC-3.4.6 source (174)

5.13.5.3.1.2.  Analyze function in inlinability

Thus after generating the call-map for its callers and calless, it begins to analyze the function. The major effect focuses on analyzing if the function is inlinable.

 

cgraph_analyze_function (continue)

 

330    node->local.inlinable = tree_inlinable_function_p (decl);

331    if (!node->local.self_insns)

332      node->local.self_insns

333        = (*lang_hooks .tree_inlining.estimate_num_insns ) (decl);

334    if (node->local.inlinable)

335      node->local.disregard_inline_limits

336        = (*lang_hooks .tree_inlining.disregard_inline_limits) (decl);

337    for (e = node->callers; e; e = e->next_caller)

338      if (e->inline_failed)

339      {

340        if (node->local.redefined_extern_inline)

341          e->inline_failed = N_("redefined extern inline functions are not "

342                            "considered for inlining");

343        else if (!node->local.inlinable)

344          e->inline_failed = N_("function not inlinable");

345        else

346          e->inline_failed = N_("function not considered for inlining");

347      }

348    if (flag_really_no_inline && !node->local.disregard_inline_limits)

349      node->local.inlinable = 0;

350    /* Inlining characteristics are maintained by the cgraph_mark_inline.  */

351    node->global.insns = node->local.self_insns;

352    if (!DECL_EXTERNAL (decl))

353    {

354      node->global.cloned_times = 1;

355      node->global.will_be_output = true;

356    }

357 

358    node->analyzed = true;

359    current_function_decl = NULL;

360 

361    /* Possibly warn about unused parameters.  */

362    if (warn_unused_parameter )

363       do_warn_unused_parameter (decl);

364  }

 

At line 330, tree_inlinable_function_p has below definition:

 

988  bool

989  tree_inlinable_function_p (tree fn)                                                         in tree-inline.c

990  {

991    return inlinable_function_p (fn);

992  }

 

Below if DECL_UNINLINABLE is nonzero, means we now ensure that the function is not inlinable.

 

1154       static bool

1155       inlinable_function_p (tree fn)                                                                in tree-inline.c

1156       {

1157         bool inlinable = true;

1158      

1159         /* If we've already decided this function shouldn't be inlined,

1160           there's no need to check again.  */

1161         if (DECL_UNINLINABLE (fn))

1162           return false;

1163      

1164         /* See if there is any language-specific reason it cannot be

1165           inlined. (It is important that this hook be called early because

1166           i n C++ it may result in template instantiation.)

1167           If the function is not inlinable for language-specific reasons,

1168           it is left up to the langhook to explain why.  */

1169         inlinable = !(*lang_hooks .tree_inlining.cannot_inline_tree_fn ) (&fn);

 

Otherwise, first make a decision by language hook which in C++ binds with following funciton.

 

2054 int

2055 cp_cannot_inline_tree_fn (tree* fnp)                                                                  in tree.c

2056 {

2057   tree fn = *fnp;

2058

2059   /* We can inline a template instantiation only if it's fully

2060     instantiated.  */

2061   if (DECL_TEMPLATE_INFO (fn)

2062       && TI_PENDING_TEMPLATE_FLAG (DECL_TEMPLATE_INFO (fn)))

2063   {

2064     /* Don't instantiate functions that are not going to be

2065        inlined.  */

2066     if (!DECL_INLINE (DECL_TEMPLATE_RESULT

2067                (template_for_substitution (fn))))

2068       return 1;

2069

2070     fn = *fnp = instantiate_decl (fn, /*defer_ok=*/ 0);

2071

2072     if (TI_PENDING_TEMPLATE_FLAG (DECL_TEMPLATE_INFO (fn)))

2073       return 1;

2074   }

2075

2076   if (flag_really_no_inline

2077       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)) == NULL)

2078     return 1;

2079

2080   /* Don't auto-inline anything that might not be bound within

2081     this unit of translation.  */

2082   if (!DECL_DECLARED_INLINE_P (fn) && !(*targetm .binds_local_p) (fn))

2083   {

2084     DECL_UNINLINABLE (fn) = 1;

2085     return 1;

2086   }

2087

2088   if (varargs_function_p (fn))

2089   {

2090     DECL_UNINLINABLE (fn) = 1;

2091     return 1;

2092   }

2093

2094   if (! function_attribute_inlinable_p (fn))

2095   {

2096     DECL_UNINLINABLE (fn) = 1;

2097     return 1;

2098   }

2099

2100   return 0;

2101 }

 

Above at line 2062, TI_PENDING_TEMPLATE_FLAG if is nonzero, means this function template has pending parameter. template_for_substitution at line 2067returns the declaration of this function template, in which if DECL_INLINE is 1, indicates that inline this function as possible. DECL_INLINE is set by front-end by case, and DECL_DECLARED_INLINE_P if nonzero, indicates in the function declaration, it uses keyword “inline”.

Here note that DECL_INLINE means inlining as possible, while DECL_UNINLINABLE indicates uninlinable. Once DECL_UNINLINABLE is set, DECL_INLINE becomes meaningless.

Below, flag_inline_trees if 0, means it won’t inline the function. By default, its value is 1.

 

inlinable_function_p (continue)

 

1171         /* If we don't have the function body available, we can't inline it.

1172           However, this should not be recorded since we also get here for

1173           forward declared inline functions. Therefore, return at once.  */

1174         if (!DECL_SAVED_TREE (fn))

1175           return false;

1176      

1177         /* If we're not inlining at all, then we cannot inline this function.  */

1178         else if (!flag_inline_trees )

1179           inlinable = false;

1180      

1181         /* Only try to inline functions if DECL_INLINE is set. This should be

1182           true for all functions declared `inline', and for all other functions

1183           as well with -finline-functions.

1184      

1185           Don't think of disregarding DECL_INLINE when flag_inline_trees == 2;

1186           it's the front-end that must set DECL_INLINE in this case, because

1187           dwarf2out loses if a function that does not have DECL_INLINE set is

1188           inlined anyway. That is why we have both DECL_INLINE and

1189           DECL_DECLARED_INLINE_P.  */

1190         /* FIXME: When flag_inline_trees dies, the check for flag_unit_at_a_time

1191           here should be redundant.  */

1192         else if (!DECL_INLINE (fn) && !flag_unit_at_a_time )

1193           inlinable = false;

1194      

1195        #ifdef INLINER_FOR_JAVA

1196         /* Synchronized methods can't be inlined. This is a bug.  */

1197         else if (METHOD_SYNCHRONIZED (fn))

1198           inlinable = false;

1199       #endif /* INLINER_FOR_JAVA */

1200      

1201   else if (inline_forbidden_p (fn))

1202   {

1203     /* See if we should warn about uninlinable functions. Previously,

1204       some of these warnings would be issued while trying to expand

1205        the function inline, but that would cause multiple warnings

1206        about functions that would for example call alloca. But since

1207        this a property of the function, just one warning is enough.

1208        As a bonus we can now give more details about the reason why a

1209        function is not inlinable.

1210        We only warn for functions declared `inline' by the user.  */

1211     bool do_warning = (warn_inline

1212                       && DECL_INLINE (fn)

1213                      && DECL_DECLARED_INLINE_P (fn)

1214                      && !DECL_IN_SYSTEM_HEADER (fn));

1215

1216     if (lookup_attribute ("always_inline",

1217                       DECL_ATTRIBUTES (fn)))

1218        sorry (inline_forbidden_reason , fn, fn);

1219     else if (do_warning)

1220       warning (inline_forbidden_reason , fn, fn);

1221

1222     inlinable = false;

1223   }

1224

1225   /* Squirrel away the result so that we don't have to check again.  */

1226   DECL_UNINLINABLE (fn) = !inlinable;

1227

1228   return inlinable;

1229 }

 

Though the front-end and the hook consider that the function is qualified for inlining, however it may contains something insuitable. It needs confirmation by below function. If inline isn’t permitted, it returns 0.

 

1141       static tree

1142       inline_forbidden_p (tree fndecl)                                                            in tree-inline.c

1143       {

1144         location_t saved_loc = input_location ;

1145         tree ret = walk_tree_without_duplicates

1146                     (&DECL_SAVED_TREE (fndecl), inline_forbidden_p_1 , fndecl);

1147         input_location = saved_loc;

1148         return ret;

1149       }

 

Similarly using walk_tree to traverses the function body. At line 1012 below, alloca_call_p if returns nonzero, indicates it calls alloca (which allocates specified size memory on stack, and memory is freed at time the function returns), so unless the user requires (via attribute “always_inline”) , don’t inline this function. Besides, if it calls setjmp or longjmp , this function is also uninlinable. Seen in previous, GCC has builtin functions offering the intermediate tree form for basic library functions’ prototype, thus we can call them without the header of the library. Function calling builtin function having variable number parameters, and builtin setjmp or longjmp can’t be inlined either.

 

996  static tree

997  inline_forbidden_p_1 (tree *nodep, int *walk_subtrees ATTRIBUTE_UNUSED,

998                      void *fnp)

999  {

1000   tree node = *nodep;

1001   tree fn = (tree) fnp;

1002   tree t;

1003

1004   switch (TREE_CODE (node))

1005   {

1006     case CALL_EXPR:

1007       /* Refuse to inline alloca call unless user explicitly forced so as

1008          this may change program's memory overhead drastically when the

1009          function using alloca is called in loop. In GCC present in

1010          SPEC2000 inlining into schedule_block cause it to require 2GB of

1011                RAM instead of 256MB.  */

1012       if (alloca_call_p (node)

1013           && !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)))

1014       {

1015          inline_forbidden_reason

1016             = N_("%Jfunction '%F' can never be inlined because it uses "

1017                  "alloca (override using the always_inline attribute)");

1018          return node;

1019       }

1020       t = get_callee_fndecl (node);

1021       if (! t)

1022         break ;

1023

1024

1025       /* We cannot inline functions that call setjmp.  */

1026       if (setjmp_call_p (t))

1027       {

1028          inline_forbidden_reason

1029              = N_("%Jfunction '%F' can never be inlined because it uses setjmp");

1030          return node;

1031       }

1032

1033       if (DECL_BUILT_IN (t))

1034         switch (DECL_FUNCTION_CODE (t))

1035          {

1036            /* We cannot inline functions that take a variable number of

1037              arguments.  */

1038            case BUILT_IN_VA_START:

1039            case BUILT_IN_STDARG_START:

1040            case BUILT_IN_NEXT_ARG:

1041            case BUILT_IN_VA_END:

1042            {

1043              inline_forbidden_reason

1044                = N_("%Jfunction '%F' can never be inlined because it "

1045                      "uses variable argument lists");

1046               return node;

1047            }

1048            case BUILT_IN_LONGJMP:

1049            {

1050               /* We can't inline functions that call __builtin_longjmp at

1051                all. The non-local goto machinery really requires the

1052                destination be in a different function. If we allow the

1053                function calling __builtin_longjmp to be inlined into the

1054                function calling __builtin_setjmp, Things will Go Awry.  */

1055              /* ??? Need front end help to identify "regular" non-local

1056                goto.  */

1057              if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL)

1058             {

1059                inline_forbidden_reason

1060                    = N_("%Jfunction '%F' can never be inlined because "

1061                          "it uses setjmp-longjmp exception handling");

1062                return node;

1063             }

1064            }

1065

1066            default :

1067              break ;

1068           }

1069        break ;

1070

1071 #ifndef INLINER_FOR_JAVA

1072     case DECL_STMT:

1073       /* We cannot inline functions that contain other functions.  */

1074       if (TREE_CODE (TREE_OPERAND (node, 0)) == FUNCTION_DECL

1075           && DECL_INITIAL (TREE_OPERAND (node, 0)))

1076       {

1077          inline_forbidden_reason

1078             = N_("%Jfunction '%F' can never be inlined "

1079                 "because it contains a nested function");

1080          return node;

1081       }

1082       break ;

1083

1084     case GOTO_STMT:

1085     case GOTO_EXPR:

1086       t = TREE_OPERAND (node, 0);

1087

1088       /* We will not inline a function which uses computed goto. The

1089          addresses of its local labels, which may be tucked into

1090          global storage, are of course not constant across

1091          instantiations, which causes unexpected behavior.  */

1092       if (TREE_CODE (t) != LABEL_DECL)

1093       {

1094          inline_forbidden_reason

1095             = N_("%Jfunction '%F' can never be inlined "

1096                  "because it contains a computed goto");

1097          return node;

1098        }

1099

1100       /* We cannot inline a nested function that jumps to a nonlocal

1101         label.  */

1102       if (TREE_CODE (t) == LABEL_DECL && DECL_CONTEXT (t) != fn)

1103              {

1104                 inline_forbidden_reason

1105                     = N_("%Jfunction '%F' can never be inlined "

1106                          "because it contains a nonlocal goto");

1107                 return node;

1108              }

1109

1110       break ;

1111

1112     case RECORD_TYPE:

1113     case UNION_TYPE:

1114       /* We cannot inline a function of the form

1115

1116          void F (int i) { struct S { int ar[i]; } s; }

1117

1118         Attempting to do so produces a catch-22.

1119         If walk_tree examines the TYPE_FIELDS chain of RECORD_TYPE/

1120                UNION_TYPE nodes, then it goes into infinite recursion on a

1121                 structure containing a pointer to its own type. If it doesn't,

1122                then the type node for S doesn't get adjusted properly when

1123                F is inlined, and we abort in find_function_data.  */

1124       for (t = TYPE_FIELDS (node); t; t = TREE_CHAIN (t))

1125                if (variably_modified_type_p (TREE_TYPE (t)))

1126                 {

1127                   inline_forbidden_reason

1128                      = N_("%Jfunction '%F' can never be inlined "

1129                           "because it uses variable sized variables");

1130                   return node;

1131                 }

1132 #endif

1133     default :

1134       break ;

1135   }

1136

1137   return NULL_TREE;

1138 }

 

Besides cases mentioned above, GNU provides an extension to allow defining nested function, the function containing nested function can't be inlined too. Another extension supported by GNU is the computed goto expression, for example:

void *ptr;

/* ... */

ptr = &&foo;

Uinary operator “&&” fetches the address of label defined in current function (“foo” in the example), experssion: “goto *ptr;” is the computed goto expression. Function containing such expression is not inlinable also.

GNU still has an extension to supoort nested class within function, one of forms is that described by comment at line 1116, which can’t be inlined too.

If pass above checkings, whether the function is inlinable is constrained by size, thus next it needs to estimate the instruction number of the function, which will be used in later optimization.

 

5833 int

5834 c _estimate_num_insns (tree decl)                                                        in c-common.c

5835 {

5836    int num = 0;

5837    walk_tree_without_duplicates (&DECL_SAVED_TREE (decl), c_estimate_num_insns_1 , &num);

5838    return num;

5839 }

 

In below MOVE_MAX_PIECES (defines 4 for x86) is the number of bytes at a time which we can move efficiently, as opposed to MOVE_MAX (16 for x86) which is the maximum number of bytes we can move with a single instruction. And if a memory to memory moving would take MOVE_RATIO or more simple move-instruction pairs, we will do a movstr or libcall instead.

 

5716 static tree

5717 c _estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)

5718 {

5719    int *count = data;

5720    tree x = *tp;

5721

5722    if (TYPE_P (x) || DECL_P (x))

5723    {

5724      *walk_subtrees = 0;

5725      return NULL;

5726    }

5727    /* Assume that constants and references counts nothing. These should

5728        be majorized by amount of operations among them we count later

5729      and are common target of CSE and similar optimizations.  */

5730    if (TREE_CODE_CLASS (TREE_CODE (x)) == 'c'

5731        || TREE_CODE_CLASS (TREE_CODE (x)) == 'r')

5732      return NULL;

5733    switch (TREE_CODE (x))

5734    {

5735      /* Recognize assignments of large structures and constructors of

5736        big arrays.  */

5737      case MODIFY_EXPR:

5738      case CONSTRUCTOR:

5739      {

5740        HOST_WIDE_INT size;

5741

5742        size = int_size_in_bytes (TREE_TYPE (x));

5743

5744        if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO)

5745           *count += 10;

5746        else

5747           *count += ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);

5748         }

5749       break ;

5750       case CALL_EXPR:

5751       {

5752        tree decl = get_callee_fndecl (x);

5753

5754        if (decl && DECL_BUILT_IN (decl))

5755             switch (DECL_FUNCTION_CODE (decl))

5756            {

5757               case BUILT_IN_CONSTANT_P:

5758                 *walk_subtrees = 0;

5759                 return NULL_TREE;

5760             case BUILT_IN_EXPECT:

5761                return NULL_TREE;

5762               default :

5763                 break ;

5764           }

5765        *count += 10;

5766        break ;

5767        }

5768       /* Few special cases of expensive operations. This is usefull

5769          to avoid inlining on functions having too many of these.  */

5770        case TRUNC_DIV_EXPR:

5771        case CEIL_DIV_EXPR:

5772        case FLOOR_DIV_EXPR:

5773        case ROUND_DIV_EXPR:

5774        case TRUNC_MOD_EXPR:

5775       case CEIL_MOD_EXPR:

5776        case FLOOR_MOD_EXPR:

5777        case ROUND_MOD_EXPR:

5778       case RDIV_EXPR:

5779          *count += 10;

5780         break ;

5781       /* Various containers that will produce no code themselves.  */

5782        case INIT_EXPR:

5783        case TARGET_EXPR:

5784       case BIND_EXPR:

5785       case BLOCK:

5786       case TREE_LIST:

5787        case TREE_VEC:

5788        case IDENTIFIER_NODE:

5789       case PLACEHOLDER_EXPR:

5790        case WITH_CLEANUP_EXPR:

5791        case CLEANUP_POINT_EXPR:

5792       case NOP_EXPR:

5793       case VIEW_CONVERT_EXPR:

5794        case SAVE_EXPR:

5795        case UNSAVE_EXPR:

5796        case COMPLEX_EXPR:

5797        case REALPART_EXPR:

5798       case IMAGPART_EXPR:

5799       case TRY_CATCH_EXPR:

5800        case TRY_FINALLY_EXPR:

5801        case LABEL_EXPR:

5802       case EXIT_EXPR:

5803        case LABELED_BLOCK_EXPR:

5804        case EXIT_BLOCK_EXPR:

5805       case EXPR_WITH_FILE_LOCATION:

5806

5807       case EXPR_STMT:

5808        case COMPOUND_STMT:

5809        case RETURN_STMT:

5810       case LABEL_STMT:

5811       case SCOPE_STMT:

5812        case FILE_STMT:

5813        case CASE_LABEL:

5814       case STMT_EXPR:

5815        case CLEANUP_STMT:

5816

5817       case SIZEOF_EXPR:

5818        case ARROW_EXPR:

5819        case ALIGNOF_EXPR:

5820          break ;

5821        case DECL_STMT:

5822          /* Do not account static initializers.  */

5823          if (TREE_STATIC (TREE_OPERAND (x, 0)))

5824          *walk_subtrees = 0;

5825         break ;

5826        default :

5827          (*count)++;

5828    }

5829    return NULL;

5830 }

 

Next in cgraph_analyze_function at line 336, hook disregard_inline_limits determines if the function should be inlined, even its size exceeds the limitation. Here for C++, the hook is bound with lhd_tree_inlining_disregard_inline_limits , function having attribute “always_inline” belongs to this exception.

Returns cgraph_finalize_compilation_unit from cgraph_analyze_function , as the function is known will be called with certainty, so visit its all callers, and set them as reachable. At the same time, record_call_1 may insert object into queue cgraph_varpool_nodes_queue , so invoke cgraph_varpool_assemble_pending_decls to emit assemble for these objects.

 

cgraph_finalize_compilation_unit (continue)

 

423    /* Collect entry points to the unit.  */

424 

425    if (cgraph_dump_file )

426    {

427      fprintf (cgraph_dump_file , "Unit entry points:");

428      for (node = cgraph_nodes ; node; node = node->next)

429        if (node->needed && DECL_SAVED_TREE (node->decl))

430      fprintf (cgraph_dump_file , " %s", cgraph_node_name (node));

431      fprintf (cgraph_dump_file , "/n/nInitial ");

432      dump_cgraph (cgraph_dump_file );

433    }

434 

435    if (cgraph_dump_file )

436      fprintf (cgraph_dump_file , "/nReclaiming functions:");

437 

438    for (node = cgraph_nodes ; node; node = node->next)

439    {

440      tree decl = node->decl;

441 

442      if (!node->reachable && DECL_SAVED_TREE (decl))

443      {

444        cgraph_remove_node (node);

445        if (cgraph_dump_file )

446          fprintf (cgraph_dump_file , " %s", cgraph_node_name (node));

447      }

448      else

449        node->next_needed = NULL;

450    }

451    if (cgraph_dump_file )

452    {

453      fprintf (cgraph_dump_file , "/n/nReclaimed ");

454      dump_cgraph (cgraph_dump_file );

455    }

456    ggc_collect ();

457    timevar_pop (TV_CGRAPH);

458  }

 

After traversing cgraph_nodes_queue , functions left in the queue, either havn’t body, or aren’t invoked. At line 438, remove the latter from cgraph map.

 

你可能感兴趣的:(Studying note of GCC-3.4.6 source (174))