Studying note of GCC-3.4.6 source (49)

4.2.10.2.              Collect data

After creating dummy function context, back to backend_init, following is init_expmed. init_expmed first calls start_sequence to prepare the rtl generation.

 

102  void

103  init_expmed (void)                                                                                           in expemd.c

104  {

105    rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;

106    int dummy;

107    int m;

108    enum machine_mode mode, wider_mode;

109 

110     start_sequence ();

 

sequence_stack is the stack of pending (incomplete) sequences saved by start_sequence, each element describes one pending sequence. The main insn-chain is saved in the last element of the stack, unless the stack is empty.

 

38      struct sequence_stack GTY(())                                                                   in function.h

39      {

40        /* First and last insns in the chain of the saved sequence.  */

41        rtx first;

42        rtx last;

43        tree sequence_rtl_expr;

44        struct sequence_stack *next;

45      };

 

In cfun all insns (rtx object of instruction) will be linked tegother in a list. Before emitting insn to collect the information, it needs records the boundary and when exits we can abandon those insns with end_sequence (instructions forming the function body).

 

4996 void

4997 start_sequence (void)                                                                                 in emit-rlt.c

4998 {

4999   struct sequence_stack *tem;

5000

5001   if (free_sequence_stack != NULL)

5002   {

5003     tem = free_sequence_stack;

5004     free_sequence_stack = tem->next;

5005   }

5006   else

5007     tem = ggc_alloc (sizeof (struct sequence_stack));

5008

5009   tem->next = seq_stack;

5010   tem->first = first_insn;

5011   tem->last = last_insn;

5012   tem->sequence_rtl_expr = seq_rtl_expr;

5013

5014   seq_stack = tem;

5015

5016   first_insn = 0;

5017   last_insn = 0;

5018 }

 

init_expmed then collects costs of creating rtx objects from certain expressions. The first two expressions under evaluated is (const 0) and reg (10000) + reg (10000).

 

init_expmed (continue)

 

112     /* This is "some random pseudo register" for purposes of calling recog

113       to see what insns exist.  */

114     reg = gen_rtx_REG (word_mode, 10000);

115  

116     zero_cost = rtx_cost (const0_rtx, 0);

117     add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);

 

Above statements at line 116 & 117 will create two temprary rtx objects as following. Note that word_mode is initialized in init_emit_once, for x86 machine, it is alias of SImode.

Studying note of GCC-3.4.6 source (49)_第1张图片

figure 24: rtx object of const integer and PLUS with registers

 

819    int

820    rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED)                    in cse.c

821    {

822      int i, j;

823      enum rtx_code code;

824      const char *fmt;

825      int total;

826   

827      if (x == 0)

828        return 0;

829   

830     /* Compute the default costs of certain things.

831        Note that targetm.rtx_costs can override the defaults.  */

832   

833      code = GET_CODE (x);

834      switch (code)

835      {

836        case MULT:

837          total = COSTS_N_INSNS (5);

838          break;

839        case DIV:

840        case UDIV:

841        case MOD:

842        case UMOD:

843          total = COSTS_N_INSNS (7);

844          break;

845        case USE:

846          /* Used in loop.c and combine.c as a marker.  */

847          total = 0;

848          break;

849        default:

850          total = COSTS_N_INSNS (1);

851      }

852   

853      switch (code)

854      {

855        case REG:

856          return 0;

857   

858        case SUBREG:

859          /* If we can't tie these modes, make this expensive. The larger

860            the mode, the more expensive it is.  */

861          if (! MODES_TIEABLE_P (GET_MODE (x), GET_MODE (SUBREG_REG (x))))

862            return COSTS_N_INSNS (2

863                             + GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD);

864          break;

865   

866        default:

867          if ((*targetm.rtx_costs) (x, code, outer_code, &total))

868            return total;

869          break;

870      }

871   

872      /* Sum the costs of the sub-rtx's, plus cost of this operation,

873        which is already in total.  */

874   

875      fmt = GET_RTX_FORMAT (code);

876      for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)

877        if (fmt[i] == 'e')

878          total += rtx_cost (XEXP (x, i), code);

879        else if (fmt[i] == 'E')

880          for (j = 0; j < XVECLEN (x, i); j++)

881            total += rtx_cost (XVECEXP (x, i, j), code);

882   

883      return total;

884    }

 

Notice that in rtx_cost the cost of arithmatic operations are determined by simple data from experience in rough, however in second switch block at line 866, target can specify function to get more correct data. Here is ix86_rtx_cost.

 

15067 static bool

15068 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)                                          in i386.c

15069 {

15070   enum machine_mode mode = GET_MODE (x);

15071

15072   switch (code)

15073   {

15074     case CONST_INT:

15075     case CONST:

15076     case LABEL_REF:

15077     case SYMBOL_REF:

15078       if (TARGET_64BIT && !x86_64_sign_extended_value (x))

15079         *total = 3;

15080       else if (TARGET_64BIT && !x86_64_zero_extended_value (x))

15081         *total = 2;

15082       else if (flag_pic && SYMBOLIC_CONST (x)

15083        && (!TARGET_64BIT

15084        || (!GET_CODE (x) != LABEL_REF

15085            && (GET_CODE (x) != SYMBOL_REF

15086                || !SYMBOL_REF_LOCAL_P (x)))))

15087         *total = 1;

15088       else

15089         *total = 0;

15090       return true;

15091

15092     case CONST_DOUBLE:

15093       if (mode == VOIDmode)

15094         *total = 0;

15095       else

15096         switch (standard_80387_constant_p (x))

15097         {

15098           case 1: /* 0.0 */

15099             *total = 1;

15100             break;

15101           default: /* Other constants */

15102             *total = 2;

15103             break;

15104           case 0:

15105           case -1:

15106           /* Start with (MEM (SYMBOL_REF)), since that's where

15107             it'll probably end up. Add a penalty for size.  */

15108             *total = (COSTS_N_INSNS (1)

15109                   + (flag_pic != 0 && !TARGET_64BIT)

15110                   + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));

15111             break;

15112         }

15113       return true;

15114

15115     case ZERO_EXTEND:

15116       /* The zero extensions is often completely free on x86_64, so make

15117         it as cheap as possible.  */

15118       if (TARGET_64BIT && mode == DImode

15119           && GET_MODE (XEXP (x, 0)) == SImode)

15120         *total = 1;

15121       else if (TARGET_ZERO_EXTEND_WITH_AND)

15122         *total = COSTS_N_INSNS (ix86_cost->add);

15123       else

15124         *total = COSTS_N_INSNS (ix86_cost->movzx);

15125       return false;

15126

15127     case SIGN_EXTEND:

15128       *total = COSTS_N_INSNS (ix86_cost->movsx);

15129       return false;

15130

15131     case ASHIFT:

15132       if (GET_CODE (XEXP (x, 1)) == CONST_INT

15133           && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))

15134       {

15135         HOST_WIDE_INT value = INTVAL (XEXP (x, 1));

15136         if (value == 1)

15137         {

15138           *total = COSTS_N_INSNS (ix86_cost->add);

15139           return false;

15140         }

15141         if ((value == 2 || value == 3)

15142            && !TARGET_DECOMPOSE_LEA

15143            && ix86_cost->lea <= ix86_cost->shift_const)

15144         {

15145           *total = COSTS_N_INSNS (ix86_cost->lea);

15146           return false;

15147         }

15148 }

15149     /* FALLTHRU */

15150

15151     case ROTATE:

15152     case ASHIFTRT:

15153     case LSHIFTRT:

15154     case ROTATERT:

15155       if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)

15156       {

15157         if (GET_CODE (XEXP (x, 1)) == CONST_INT)

15158         {

15159           if (INTVAL (XEXP (x, 1)) > 32)

15160             *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);

15161           else

15162             *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);

15163         }

15164         else

15165         {

15166           if (GET_CODE (XEXP (x, 1)) == AND)

15167             *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);

15168           else

15169             *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);

15170         }

15171       }

15172       else

15173       {

15174         if (GET_CODE (XEXP (x, 1)) == CONST_INT)

15175           *total = COSTS_N_INSNS (ix86_cost->shift_const);

15176         else

15177           *total = COSTS_N_INSNS (ix86_cost->shift_var);

15178       }

15179       return false;

15180

15181     case MULT:

15182       if (FLOAT_MODE_P (mode))

15183         *total = COSTS_N_INSNS (ix86_cost->fmul);

15184       else if (GET_CODE (XEXP (x, 1)) == CONST_INT)

15185       {

15186         unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));

15187         int nbits;

15188

15189         for (nbits = 0; value != 0; value >>= 1)

15190           nbits++;

15191

15192         *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]

15193                      + nbits * ix86_cost->mult_bit);

15194       }

15195       else

15196       {

15197         /* This is arbitrary */

15198         *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]

15199                      + 7 * ix86_cost->mult_bit);

15200       }

15201       return false;

15202

15203     case DIV:

15204     case UDIV:

15205     case MOD:

15206     case UMOD:

15207       if (FLOAT_MODE_P (mode))

15208         *total = COSTS_N_INSNS (ix86_cost->fdiv);

15209       else

15210         *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);

15211       return false;

15212

15213     case PLUS:

15214       if (FLOAT_MODE_P (mode))

15215         *total = COSTS_N_INSNS (ix86_cost->fadd);

15216       else if (!TARGET_DECOMPOSE_LEA

15217        && GET_MODE_CLASS (mode) == MODE_INT

15218        && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))

15219       {

15220         if (GET_CODE (XEXP (x, 0)) == PLUS

15221             && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT

15222             && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT

15223             && CONSTANT_P (XEXP (x, 1)))

15224         {

15225           HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));

15226           if (val == 2 || val == 4 || val == 8)

15227           {

15228             *total = COSTS_N_INSNS (ix86_cost->lea);

15229             *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);

15230             *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),

15231                            outer_code);

15232             *total += rtx_cost (XEXP (x, 1), outer_code);

15233             return true;

15234           }

15235         }

15236         else if (GET_CODE (XEXP (x, 0)) == MULT

15237             && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)

15238         {

15239           HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));

15240           if (val == 2 || val == 4 || val == 8)

15241           {

15242             *total = COSTS_N_INSNS (ix86_cost->lea);

15243             *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);

15244             *total += rtx_cost (XEXP (x, 1), outer_code);

15245             return true;

15246           }

15247         }

15248         else if (GET_CODE (XEXP (x, 0)) == PLUS)

15249         {

15250           *total = COSTS_N_INSNS (ix86_cost->lea);

15251           *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);

15252           *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);

15253           *total += rtx_cost (XEXP (x, 1), outer_code);

15254           return true;

15255         }

15256       }

15257       /* FALLTHRU */

15258

15259     case MINUS:

15260       if (FLOAT_MODE_P (mode))

15261       {

15262         *total = COSTS_N_INSNS (ix86_cost->fadd);

15263         return false;

15264       }

15265       /* FALLTHRU */

15266

15267     case AND:

15268     case IOR:

15269     case XOR:

15270       if (!TARGET_64BIT && mode == DImode)

15271       {

15272         *total = (COSTS_N_INSNS (ix86_cost->add) * 2

15273                + (rtx_cost (XEXP (x, 0), outer_code)

15274                << (GET_MODE (XEXP (x, 0)) != DImode))

15275                + (rtx_cost (XEXP (x, 1), outer_code)

15276                << (GET_MODE (XEXP (x, 1)) != DImode)));

15277         return true;

15278       }

15279       /* FALLTHRU */

15280

15281     case NEG:

15282       if (FLOAT_MODE_P (mode))

15283       {

15284         *total = COSTS_N_INSNS (ix86_cost->fchs);

15285         return false;

15286       }

15287       /* FALLTHRU */

15288

15289     case NOT:

15290       if (!TARGET_64BIT && mode == DImode)

15291         *total = COSTS_N_INSNS (ix86_cost->add * 2);

15292       else

15293         *total = COSTS_N_INSNS (ix86_cost->add);

15294       return false;

15295

15296     case FLOAT_EXTEND:

15297       if (!TARGET_SSE_MATH

15298           || mode == XFmode

15299           || (mode == DFmode && !TARGET_SSE2))

15300         *total = 0;

15301         return false;

15302

15303     case ABS:

15304       if (FLOAT_MODE_P (mode))

15305         *total = COSTS_N_INSNS (ix86_cost->fabs);

15306       return false;

15307

15308     case SQRT:

15309       if (FLOAT_MODE_P (mode))

15310         *total = COSTS_N_INSNS (ix86_cost->fsqrt);

15311       return false;

15312

15313     case UNSPEC:

15314       if (XINT (x, 1) == UNSPEC_TP)

15315         *total = 0;

15316       return false;

15317

15318     default:

15319       return false;

15320   }

15321 }

 

For constant 0, it enters ix86_rtx_costs in rtx_cost at line 867, and in ix86_rtx_costs, for 32 bits x86 system, it returns at line 15091 with value = 0. Then rtx_cost returns at line 868 and assigns zero_cost with 0.

For reg (10000) + reg (10000), it also enters ix86_rtx_costs in rtx_cost at line 867, and at line 15214, the condition is met, and falls through until at line 15294. ix86_cost is predefined structure records the cost of certain operations upon certain chips. For pentium, the add cost is 1. And total gets 4 in the end. Note that when ix86_rtx_costs returns, in rtx_cost, it falls though to line 875. For rtl code of PLUS, its format is ‘ee’, which means the two childrens can be expression. For our expression here, the two childrens are register, they return at line 855 with 0. In the end add_cost gets 4.

init_expmed then continue to get cost information for shift, shift-add, and shift-minus expressions.

 

init_expmed (continue)

 

119     shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,

120                              gen_rtx_ASHIFT (word_mode, reg,

121                                            const0_rtx)));

122 

123    shiftadd_insn

124      = emit_insn (gen_rtx_SET (VOIDmode, reg,

125                      gen_rtx_PLUS (word_mode,

126                                  gen_rtx_MULT (word_mode,

127                                              reg, const0_rtx),

128                                  reg)));

129 

130    shiftsub_insn

131      = emit_insn (gen_rtx_SET (VOIDmode, reg,

132                      gen_rtx_MINUS (word_mode,

133                                   gen_rtx_MULT (word_mode,

134                                               reg, const0_rtx),

135                                   reg)));

136 

137    init_recog ();

 

gen_rtx_SET, gen_rtx_ASHIFT, gen_rtx_MULT, gen_rtx_MINUS, as we expecting, all call gen_rtx_fmt_ee – the rtx object created has two childrens which can be expression. For above codes, following rtx objects will be created.

Studying note of GCC-3.4.6 source (49)_第2张图片

 

figure 25: rtx object for shift

figure 26: rtx object for shift-add

Studying note of GCC-3.4.6 source (49)_第3张图片

figure 27: rtx object for shift-sub

Above rtx objects are all patterns of instruction. The standard rtx object during compilation is insn, the counterpart of instruction or statement or expression for high level programming language. emit_insn creates insn object from rtx objects stand for the body. All insn objects are organized by doubly linked list with the order as their appearance in the source.

 

4656 rtx

4657 emit_insn (rtx x)                                                                                       in emit-rtl.c

4658 {

4659   rtx last = last_insn;

4660   rtx insn;

4661

4662   if (x == NULL_RTX)

4663     return last;

4664

4665   switch (GET_CODE (x))

4666   {

4667     case INSN:

4668     case JUMP_INSN:

4669     case CALL_INSN:

4670     case CODE_LABEL:

4671     case BARRIER:

4672     case NOTE:

4673       insn = x;

4674       while (insn)

4675       {

4676         rtx next = NEXT_INSN (insn);

4677         add_insn (insn);

4678         last = insn;

4679         insn = next;

4680       }

4681       break;

4682

4683 #ifdef ENABLE_RTL_CHECKING

4684     case SEQUENCE:

4685       abort ();

4686       break;

4687 #endif

4688

4689     default:

4690       last = make_insn_raw (x);

4691       add_insn (last);

4692       break;

4693   }

4694

4695   return last;

4696 }

 

For our case, make_insn_raw will be inovked to create the insn object.

 

3459 rtx

3460 make_insn_raw (rtx pattern)                                                                       in emit-rtl.c

3461 {

3462   rtx insn;

3463

3464   insn = rtx_alloc (INSN);

3465

3466   INSN_UID (insn) = cur_insn_uid++;

3467   PATTERN (insn) = pattern;

3468   INSN_CODE (insn) = -1;

3469   LOG_LINKS (insn) = NULL;

3470   REG_NOTES (insn) = NULL;

3471   INSN_LOCATOR (insn) = 0;

3472   BLOCK_FOR_INSN (insn) = NULL;

3473

3474 #ifdef ENABLE_RTL_CHECKING

3475   if (insn

3476       && INSN_P (insn)

3477       && (returnjump_p (insn)

3478         || (GET_CODE (insn) == SET

3479           && SET_DEST (insn) == pc_rtx)))

3480   {

3481     warning ("ICE: emit_insn used where emit_jump_insn needed:/n");

3482     debug_rtx (insn);

3483   }

3484 #endif

3485

3486   return insn;

3487 }

 

And following are some macros used above, they all work on an insn object which also is an rtx.

 

561    /* Holds a unique number for each insn.

562      These are not necessarily sequentially increasing.  */

563    #define INSN_UID(INSN)  XINT (INSN, 0)                                                   in rtl.h

564   

565    /* Chain insns together in sequence.  */

566    #define PREV_INSN(INSN)     XEXP (INSN, 1)

567    #define NEXT_INSN(INSN)     XEXP (INSN, 2)

568   

569    #define BLOCK_FOR_INSN(INSN) XBBDEF (INSN, 3)

570    #define INSN_LOCATOR(INSN) XINT (INSN, 4)

571    /* The body of an insn.  */

572    #define PATTERN(INSN)   XEXP (INSN, 5)

573   

574    /* Code number of instruction, from when it was recognized.

575     -1 means this instruction has not been recognized yet.  */

576    #define INSN_CODE(INSN) XINT (INSN, 6)

577   

578    /* Set up in flow.c; empty before then.

579      Holds a chain of INSN_LIST rtx's whose first operands point at

580      previous insns with direct data-flow connections to this one.

581      That means that those insns set variables whose next use is in this insn.

582      They are always in the same basic block as this insn.  */

583    #define LOG_LINKS(INSN)     XEXP (INSN, 7)

584 

585    /* Holds a list of notes on what this insn does to various REGs.

586      It is a chain of EXPR_LIST rtx's, where the second operand is the

587      chain pointer and the first operand is the REG being described.

588      The mode field of the EXPR_LIST contains not a real machine mode

589      but a value from enum reg_note.  */

590   

591    #define REG_NOTES(INSN)    XEXP (INSN, 8)

 

At line 4691 emit_insn, add_insn links the created insn object into the cfun object.

 

3534 void

3535 add_insn (rtx insn)                                                                                    in emit-rtl.c

3536 {

3537   PREV_INSN (insn) = last_insn;

3538   NEXT_INSN (insn) = 0;

3539

3540   if (NULL != last_insn)

3541     NEXT_INSN (last_insn) = insn;

3542

3543   if (NULL == first_insn)

3544     first_insn = insn;

3545

3546   last_insn = insn;

3547 }

 

From above code, we can see the insn object should look like following.

figure 28: rtx object of instruction

At line 137, in init_expmed above, init_reg just sets global variable volatile_ok to 1, which has nonzero value means allow operands to be volatile. init_expmed then comes to collect the cost of expressions. And after that, it needs evaluate negate, division, and modulation expressions.

 

init_expmed (continue)

 

139    shift_cost[0] = 0;

140    shiftadd_cost[0] = shiftsub_cost[0] = add_cost;

141 

142    for (m = 1; m < MAX_BITS_PER_WORD; m++)

143    {

144      rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);

145      shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;

146 

147      XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);

148      if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)

149        shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);

150 

151      XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;

152      if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)

153        shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);

154 

155      XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;

156      if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)

157        shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);

158    }

159 

160    negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);

161 

162    sdiv_pow2_cheap

163      = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)

164         <= 2 * add_cost);

165    smod_pow2_cheap

166      = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)

167         <= 2 * add_cost);

 

SET_SRC will do checking to ensure RTX_CODE is SET.

 

542    #define XCEXP(RTX, N, C) (RTL_CHECKC1 (RTX, N, C).rtx)                         in rtl.h

1245   #define SET_SRC(RTX) XCEXP(RTX, 1, SET)

 

Above, we have gotten that add_cost is 4 for 32 bit x86 system. For shift operation with operand 0, the net effect is doing nothing, so the cost should be 0. So the shift add/sub operation with shift operand of 0 should be just the same as the add_cost. That is the purpose of line 139~140.

recog above is generated by machine description file (here is i386.md) with genrecog tool. The returned value of recog is insn-code, with -1, it means the insn is not recognized.

We can see that for unrecognized insn, the cost is set arbitrarily as 32000, a very high value. If the insn is recognized, remember in its pattern (the rtx objects shown by above figures), the const_0 rtx object is replaced by const_`m` rtx objects, and try to evaluate their cost. Note that the evaluated object is the second child of the rtx objects of SET code.

Now resort to rtx_cost, we can get following information (takes pentium4 for example):

shift_cost [1] = add_cost = 4

shift_cost [2] = shift_cost [3] = lea cost = 4

shift_cost [4] … shift_cost [31] = constant shift cost = 16

shiftadd_cost [1] (with multiplicator = 1 << 1) = lea cost = 4

shiftadd_cost [2] (with multiplicator = 1 << 2) = lea cost = 4

shiftadd_cost [3] (with multiplicator = 1 << 3) = lea cost = 4

shiftadd_cost [with other multiplicator] = mult cost + add cost = 64

shiftsub_cost [n] = add cost + mult cost = 64

Above, line 160, following rtx object is created.

Studying note of GCC-3.4.6 source (49)_第4张图片

figure 29: rtx objects of NEG, DIV and MOD

And we can get that

neg_cost = add_cost = 4

div_cost = 224 > add_cost * 2, sdiv_pow2_cheap = false

mod_cost = 224 > add_cost * 2, sdiv_pow2_cheap = false

Following, as multiply and divide operation may have different cost for different mode, and because integer types are commonly used, we save these values into static variables.

 

init_expmed (continue)

 

169    for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);

170         mode != VOIDmode;

171         mode = GET_MODE_WIDER_MODE (mode))

172    {

173      reg = gen_rtx_REG (mode, 10000);

174      div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);

175      mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);

176      wider_mode = GET_MODE_WIDER_MODE (mode);

177      if (wider_mode != VOIDmode)

178      {

179        mul_widen_cost[(int) wider_mode]

180          = rtx_cost (gen_rtx_MULT (wider_mode,

181                             gen_rtx_ZERO_EXTEND (wider_mode, reg),

182                             gen_rtx_ZERO_EXTEND (wider_mode, reg)),

183                SET);

184        mul_highpart_cost[(int) mode]

185          = rtx_cost (gen_rtx_TRUNCATE

186                (mode,

187                gen_rtx_LSHIFTRT (wider_mode,

188                                 gen_rtx_MULT (wider_mode,

189                                            gen_rtx_ZERO_EXTEND

190                                            (wider_mode, reg),

191                                            gen_rtx_ZERO_EXTEND

192                                            (wider_mode, reg)),

193                                 GEN_INT (GET_MODE_BITSIZE (mode)))),

194                SET);

195      }

196   }

197 

198    end_sequence ();

199  }

 

Above, for pentium4 the div_cost and mul_cost are same for all integer mode, they are all 224. line 179, mul_widen_cost records the operation of multiplication with mode promotion. They are the result of evaluate following rtx objects (takes SImode for example).

Studying note of GCC-3.4.6 source (49)_第5张图片

figure 30: rtx object of widen MUL of SImode

For pentium4, we get mul_widen_cost [mode] are all 232, mul_highpart_cost, for SImode, is 264, others are 248. After finishing all these operations, init_expmed invokes end_sequence to restore previous saved state.

你可能感兴趣的:(Studying note of GCC-3.4.6 source (49))