Studying note of GCC-3.4.6 source (177)

5.13.5.3.2.2.2.          Function demands inline

Below, order is the queue of cgraph_node in invocation order. Here, the visit to the sorted nodes begins at the function at bottom of the call stack (note that see in previous section, if there is cycle, functions forming this cycle would be put at the tail of the queue), that is treatment begins at the last invoked function, once the function is determined as not inlinable, its caller is neither (same as cycle, so below when sorting functions of cycle, put them at tail of the sorted queue, then all other callers can’t be inlined too).

 

cgraph_decide_inlining (continue)

 

1271   /* In the first pass mark all always_inline edges. Do this with a priority

1272     so none of our later choices will make this impossible.  */

1273   for (i = nnodes - 1; i >= 0; i--)

1274   {

1275     struct cgraph_edge *e;

1276

1277     node = order[i];

1278

1279       for (e = node->callees; e; e = e->next_callee)

1280       if (e->callee->local.disregard_inline_limits)

1281          break ;

1282     if (!e)

1283       continue ;

1284     if (cgraph_dump_file )

1285       fprintf (cgraph_dump_file ,

1286              "/nConsidering %s %i insns (always inline)/n",

1287             cgraph_node_name (e->callee), e->callee->global.insns);

1288     ninlined = cgraph_inlined_into (order[i], inlined);

1289     for (; e; e = e->next_callee)

1290     {

1291        old_insns = overall_insns ;

1292        if (!e->inline_failed || !e->callee->local.inlinable

1293           || !e->callee->local.disregard_inline_limits)

1294             continue ;

1295         if (e->callee->output || e->callee == node)

1296          {

1297          e->inline_failed = N_("recursive inlining");

1298          continue ;

1299        }

1300        ninlined_callees =

1302               cgraph_inlined_callees (e->callee, inlined_callees);

1303        cgraph_mark_inline (node, e->callee, inlined, ninlined,

1304                         inlined_callees, ninlined_callees);

1305        for (y = 0; y < ninlined_callees; y++)

1306          inlined_callees[y]->output = 0, inlined_callees[y]->aux = 0;

1307        if (cgraph_dump_file )

1308          fprintf (cgraph_dump_file ,

1309                " Inlined into %s which now has %i insns./n",

1310                cgraph_node_name (node->callees->caller),

1311                node->callees->caller->global.insns);

1312     }

1313     if (cgraph_dump_file && node->global.cloned_times > 0)

1314       fprintf (cgraph_dump_file ,

1315              " Inlined %i times for a net change of %+i insns./n",

1316              node->global.cloned_times, overall_insns - old_insns);

1317     for (y = 0; y < ninlined; y++)

1318       inlined[y]->output = 0, inlined[y]->aux = 0;

1319   }

 

Loop at line 1279 accesses functions directly invoked by the specified function, condition at line 1280 selects function forcely demand inlining (via attribution “always_inline”). Separated by this “always_inline” function, there needs two set of data, one is of all directly, indireclty callers of this function, which have been analyzed as inlinable already.

 

681  static int

682  cgraph_inlined_into (struct cgraph_node *node, struct cgraph_node **array) in cgraphunit.c

683  {

684    int nfound = 0;

685    struct cgraph_edge **stack;

686    struct cgraph_edge *e, *e1;

687    int sp;

688    int i;

689 

690    /* Fast path: since we traverse in mostly topological order, we will likely

691      find no edges.  */

692    for (e = node->callers; e; e = e->next_caller)

693      if (!e->inline_failed)

694        break ;

695 

696    if (!e)

697      return 0;

698 

699    /* Allocate stack for back-tracking up callgraph.  */

700    stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge));

701    sp = 0;

702 

703    /* Push the first edge on to the stack.  */

704    stack[sp++] = e;

705 

706    while (sp)

707    {

708      struct cgraph_node *caller;

709 

710       /* Look at the edge on the top of the stack.  */

711      e = stack[sp - 1];

712      caller = e->caller;

713 

714      /* Check if the caller destination has been visited yet.  */

715      if (!caller->output)

716      {

717        array[nfound++] = e->caller;

718        /* Mark that we have visited the destination.  */

719        caller->output = true;

720        SET_INLINED_TIMES (caller, 0);

721      }

722      SET_INLINED_TIMES (caller, INLINED_TIMES (caller) + 1);

723 

724      for (e1 = caller->callers; e1; e1 = e1->next_caller)

725        if (!e1->inline_failed)

726          break ;

727 

728      if (e1)

729        stack[sp++] = e1;

730      else

731      {

732        while (true)

733        {

734          for (e1 = e->next_caller; e1; e1 = e1->next_caller)

735            if (!e1->inline_failed)

736              break ;

737 

738          if (e1)

739          {

740            stack[sp - 1] = e1;

741            break ;

742          }

743          else

744          {

745            sp--;

746            if (!sp)

747               break ;

748            e = stack[sp - 1];

749          }

750        }

751      }

752    }

753 

754    free (stack);

755 

756 

757    if (cgraph_dump_file )

758    {

759      fprintf (cgraph_dump_file , " Found inline predecesors of %s:",

760            cgraph_node_name (node));

761      for (i = 0; i < nfound; i++)

762      {

763        fprintf (cgraph_dump_file , " %s", cgraph_node_name (array[i]));

764        if (INLINED_TIMES (array[i]) != 1)

765          fprintf (cgraph_dump_file , " (%i times)",

766                (int)INLINED_TIMES (array[i]));

767      }

768      fprintf (cgraph_dump_file , "/n");

769    }

770 

771    return nfound;

772  }

 

Above the traversal is the deep-first one with aid of stack. First loop at line 629 finds out the first inlinable caller (note that in create_edge , field inline_failed of fresh cgraph_edge is not empty, so if it is NULL means this invocation is known inlinable already, but whether the function is inlinable depends on whether it can be inlined in all invocations). Add the caller into stack, and begins iteration. Stack grows at line 666 dues to find out an inlinable caller of the caller further; and overwirtes the top element at line 677 dues to callers of the function have been visited, it needs cover those of its sibles; and decrease at line 682, because the function tegother with its sibles are finished, needs retrospect to the callee, and visit its sible.

Note SET_INLINED_TIMES at line 659, it accumulates the times of inlining if it taken, its definition is as below:

 

607  #define INLINED_TIMES (node) ((size_t)(node)->aux)                           in cgraphunit.c

608  #define SET_INLINED_TIMES (node,times) ((node)->aux = (void *)(times))

 

All such direct, indirect callers are included into array inlined in cgraph_decide_inlining (line 1288).

Note that before invoking cgraph_inlined_callees , how to determine the recursion by cgraph_decide_inlining . Given cgraph_node is in topological order, if there is a recursion, it must be a direct or indirect path from callee to caller, which showed by encountering processed node (“e->callee->output” is true), or self invocation (condition “e->callee == node”). Such function can’t be inlined, so needn’t further handling.

Another group of data is direct or indirect callees which are considered inlinable in previous. They are recorded into array inlined_callees in cgraph_decide_inlining (line 1301).

 

717  static int

718  cgraph_inlined_callees (struct cgraph_node *node, struct cgraph_node **array)     in cgraphunit.c

719  {

720    int nfound = 0;

721    struct cgraph_edge **stack;

722    struct cgraph_edge *e, *e1;

723    int sp;

724    int i;

725 

726    /* Fast path: since we traverse in mostly topological order, we will likely

727      find no edges.  */

728    for (e = node->callees; e; e = e->next_callee)

729      if (!e->inline_failed)

730        break ;

731 

732    if (!e)

733      return 0;

734 

735    /* Allocate stack for back-tracking up callgraph.  */

736    stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge));

737    sp = 0;

738 

739    /* Push the first edge on to the stack.  */

740    stack[sp++] = e;

741 

742    while (sp)

743    {

744      struct cgraph_node *callee;

745 

746       /* Look at the edge on the top of the stack.  */

747      e = stack[sp - 1];

748      callee = e->callee;

749 

750      /* Check if the callee destination has been visited yet.  */

751      if (!callee->output)

752      {

753        array[nfound++] = e->callee;

754        /* Mark that we have visited the destination.  */

755        callee->output = true;

756        SET_INLINED_TIMES (callee, 0);

757      }

758      SET_INLINED_TIMES (callee, INLINED_TIMES (callee) + 1);

759 

760      for (e1 = callee->callees; e1; e1 = e1->next_callee)

761        if (!e1->inline_failed)

762          break ;

763      if (e1)

764        stack[sp++] = e1;

765      else

766      {

767        while (true)

768        {

769          for (e1 = e->next_callee; e1; e1 = e1->next_callee)

770            if (!e1->inline_failed)

771              break ;

772 

773          if (e1)

774          {

775            stack[sp - 1] = e1;

776            break ;

777          }

778          else

779          {

780            sp--;

781            if (!sp)

782               break ;

783            e = stack[sp - 1];

784          }

785        }

786      }

787    }

788 

789    free (stack);

790 

791    if (cgraph_dump_file )

792    {

793      fprintf (cgraph_dump_file , " Found inline successors of %s:",

794            cgraph_node_name (node));

795      for (i = 0; i < nfound; i++)

796      {

797        fprintf (cgraph_dump_file , " %s", cgraph_node_name (array[i]));

798        if (INLINED_TIMES (array[i]) != 1)

799          fprintf (cgraph_dump_file , " (%i times)",

800                (int)INLINED_TIMES (array[i]));

801      }

802      fprintf (cgraph_dump_file , "/n");

803    }

804 

805    return nfound;

806  }

 

Here it is also a deep-first traversal, what differnet is that it goes along with callees chain and next_callee chain.

Here pay attention to how to select candidate in cgraph_decide_inlining , at line 1292, condtion “!e->inline_failed” means the invocation is known inlinable already, and condition “!e->callee->local.inlinable” indicates the function is not inlinable. For inlinable function, but the inlinability of its invocation isn’t known yet, one side by cgraph_inlined_callees to collect path known inlinable, on the other side by below funciton to set this invocation, and determine whether the function can be inlined.

 

951  static void

952  cgraph_mark_inline (struct cgraph_node *to, struct cgraph_node *what, in cgraphunit.c

953                   struct cgraph_node **inlined, int ninlined,

954                   struct cgraph_node **inlined_callees,

955                   int ninlined_callees)

956  {

957    int i;

958    int times = 0;

959    int clones = 0;

960    struct cgraph_edge *e;

961    bool called = false;

962    int new_insns;

963 

964    what->global.inlined = 1;

965    for (e = what->callers; e; e = e->next_caller)

966    {

967      if (e->caller == to)

968      {

969        if (!e->inline_failed)

970          continue ;

971        e->inline_failed = NULL;

972        times++;

973        clones += e->caller->global.cloned_times;

974      }

975      else if (e->inline_failed)

976        called = true;

977    }

978    if (!times)

979      abort ();

980    ncalls_inlined += times;

981 

982    new_insns = cgraph_estimate_size_after_inlining (times, to, what);

983    if (to->global.will_be_output)

984      overall_insns += new_insns - to->global.insns;

985    to->global.insns = new_insns;

986 

987    if (!called && !what->needed && !what->origin

988        && flag_unit_at_a_time

989        && !DECL_EXTERNAL (what->decl))

990    {

991      if (!what->global.will_be_output)

992        abort ();

993      clones--;

994      nfunctions_inlined ++;

995      what->global.will_be_output = 0;

996      overall_insns -= what->global.insns;

997    }

998    what->global.cloned_times += clones;

999    for (i = 0; i < ninlined; i++)

1000   {

1001     new_insns =

1002        cgraph_estimate_size_after_inlining (INLINED_TIMES (inlined[i]) *

1003                                     times, inlined[i], what);

1004     if (inlined[i]->global.will_be_output)

1005       overall_insns += new_insns - inlined[i]->global.insns;

1006     inlined[i]->global.insns = new_insns;

1007   }

1008   for (i = 0; i < ninlined_callees; i++)

1009   {

1010     inlined_callees[i]->global.cloned_times +=

1011          INLINED_TIMES (inlined_callees[i]) * clones;

1012   }

1013 }

 

Note that every cgraph_node entering above funciton has its field “local.inlinable” held, it is because at calling this function, this invocation under treatment is known inlinable. See that one cgraph_edge stands for one invocation, if one function invokes another function several times, it will build more than one cgraph_edge for one to one mapping. But if it satisfies condition at line 969, it means something wrong in the compiler, it will abort at line 979.

Besides, one invocation is inlinable doesn't means the function is inlinable. The function is inlinable only when all its invocations are inlinable, that is condition at line 987 (in which, “!what->needed” indicates its address isn’t referred, “!what->origin” stands for nested function), otherwise it will be degraded to function call instead of inlining.

 

909  static int

910  cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,    in cgraphunit.c

911                                struct cgraph_node *what)

912  {

913    return (what->global.insns - INSNS_PER_CALL) * times + to->global.insns;

914  }

 

Following is updating the estimation of total instruction number (calculation of overall_insns ), and total expanded instruction number of every inlinable function (line 1010). Note that at line 1006 the estimation of instruction number of the caller doesn’t include current function, nevertheless treatment of the caller doesn’t begin yet, when being processed as current one, it would get more correct number.

 

你可能感兴趣的:(Studying note of GCC-3.4.6 source (177))