完成创建伪函数上下文的任务后,回到backend_init,跟着的是init_expmed。init_expmed首先调用start_sequence来为rtl的产生做准备。
102 void
103 init_expmed (void) in expemd.c
104 {
105 rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
106 int dummy;
107 int m;
108 enum machine_mode mode, wider_mode;
109
110 start_sequence ();
sequence_stack是由start_sequence保存的待定(未完成)指令序列形成的栈,其中每个栈元素描述了一个待定序列。主指令链保存在最后一个元素中,除非栈是空的。
38 struct sequence_stack GTY(()) in function.h
39 {
40 /* First and last insns in the chain of the saved sequence. */
41 rtx first;
42 rtx last;
43 tree sequence_rtl_expr;
44 struct sequence_stack *next;
45 };
在cfun中,所有insns(指令的rtx对象)被链作一个列表。在导出insn收集记录信息前,需要记录这个边界,在退出时,我们可以通过end_sequence来丢弃这些insn(构成函数体的指令)。
4996 void
4997 start_sequence (void) in emit-rlt.c
4998 {
4999 struct sequence_stack *tem;
5000
5001 if (free_sequence_stack != NULL)
5002 {
5003 tem = free_sequence_stack;
5004 free_sequence_stack = tem->next;
5005 }
5006 else
5007 tem = ggc_alloc (sizeof (struct sequence_stack));
5008
5009 tem->next = seq_stack;
5010 tem->first = first_insn;
5011 tem->last = last_insn;
5012 tem->sequence_rtl_expr = seq_rtl_expr;
5013
5014 seq_stack = tem;
5015
5016 first_insn = 0;
5017 last_insn = 0;
5018 }
接着init_expmed从特定表达式收集创建rtx对象的代价。头2个被评估的表达式是(const 0)和reg (10000) + reg (10000)。
init_expmed (continue)
112 /* This is "some random pseudo register" for purposes of calling recog
113 to see what insns exist. */
114 reg = gen_rtx_REG (word_mode, 10000);
115
116 zero_cost = rtx_cost (const0_rtx, 0);
117 add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
上面在116和117行的语句将参加如下的2个临时rtx对象。注意到word_mode在init_emit_once中初始化,对于x86机器,它是Simode的别名。
图24:整数及使用寄存器的PLUS表达式的rtx对象
819 int
820 rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED) in cse.c
821 {
822 int i, j;
823 enum rtx_code code;
824 const char *fmt;
825 int total;
826
827 if (x == 0)
828 return 0;
829
830 /* Compute the default costs of certain things.
831 Note that targetm.rtx_costs can override the defaults. */
832
833 code = GET_CODE (x);
834 switch (code)
835 {
836 case MULT:
837 total = COSTS_N_INSNS (5);
838 break;
839 case DIV:
840 case UDIV:
841 case MOD:
842 case UMOD:
843 total = COSTS_N_INSNS (7);
844 break;
845 case USE:
846 /* Used in loop.c and combine.c as a marker. */
847 total = 0;
848 break;
849 default:
850 total = COSTS_N_INSNS (1);
851 }
852
853 switch (code)
854 {
855 case REG:
856 return 0;
857
858 case SUBREG:
859 /* If we can't tie these modes, make this expensive. The larger
860 the mode, the more expensive it is. */
861 if (! MODES_TIEABLE_P (GET_MODE (x), GET_MODE (SUBREG_REG (x))))
862 return COSTS_N_INSNS (2
863 + GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD);
864 break;
865
866 default:
867 if ((*targetm.rtx_costs) (x, code, outer_code, &total))
868 return total;
869 break;
870 }
871
872 /* Sum the costs of the sub-rtx's, plus cost of this operation,
873 which is already in total. */
874
875 fmt = GET_RTX_FORMAT (code);
876 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
877 if (fmt[i] == 'e')
878 total += rtx_cost (XEXP (x, i), code);
879 else if (fmt[i] == 'E')
880 for (j = 0; j < XVECLEN (x, i); j++)
881 total += rtx_cost (XVECEXP (x, i, j), code);
882
883 return total;
884 }
注意到在rtx_cost中算术操作的代价根据表达式通过简单数据粗略确定,不过在866行的第二个switch块,目标机器可以通过指定的函数来得到更准确的数据。在这里这个函数是ix86_rtx_cost。
15067 static bool
15068 ix86_rtx_costs (rtx x, int code, int outer_code, int *total) in i386.c
15069 {
15070 enum machine_mode mode = GET_MODE (x);
15071
15072 switch (code)
15073 {
15074 case CONST_INT:
15075 case CONST:
15076 case LABEL_REF:
15077 case SYMBOL_REF:
15078 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15079 *total = 3;
15080 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15081 *total = 2;
15082 else if (flag_pic && SYMBOLIC_CONST (x)
15083 && (!TARGET_64BIT
15084 || (!GET_CODE (x) != LABEL_REF
15085 && (GET_CODE (x) != SYMBOL_REF
15086 || !SYMBOL_REF_LOCAL_P (x)))))
15087 *total = 1;
15088 else
15089 *total = 0;
15090 return true;
15091
15092 case CONST_DOUBLE:
15093 if (mode == VOIDmode)
15094 *total = 0;
15095 else
15096 switch (standard_80387_constant_p (x))
15097 {
15098 case 1: /* 0.0 */
15099 *total = 1;
15100 break;
15101 default: /* Other constants */
15102 *total = 2;
15103 break;
15104 case 0:
15105 case -1:
15106 /* Start with (MEM (SYMBOL_REF)), since that's where
15107 it'll probably end up. Add a penalty for size. */
15108 *total = (COSTS_N_INSNS (1)
15109 + (flag_pic != 0 && !TARGET_64BIT)
15110 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15111 break;
15112 }
15113 return true;
15114
15115 case ZERO_EXTEND:
15116 /* The zero extensions is often completely free on x86_64, so make
15117 it as cheap as possible. */
15118 if (TARGET_64BIT && mode == DImode
15119 && GET_MODE (XEXP (x, 0)) == SImode)
15120 *total = 1;
15121 else if (TARGET_ZERO_EXTEND_WITH_AND)
15122 *total = COSTS_N_INSNS (ix86_cost->add);
15123 else
15124 *total = COSTS_N_INSNS (ix86_cost->movzx);
15125 return false;
15126
15127 case SIGN_EXTEND:
15128 *total = COSTS_N_INSNS (ix86_cost->movsx);
15129 return false;
15130
15131 case ASHIFT:
15132 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15133 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15134 {
15135 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15136 if (value == 1)
15137 {
15138 *total = COSTS_N_INSNS (ix86_cost->add);
15139 return false;
15140 }
15141 if ((value == 2 || value == 3)
15142 && !TARGET_DECOMPOSE_LEA
15143 && ix86_cost->lea <= ix86_cost->shift_const)
15144 {
15145 *total = COSTS_N_INSNS (ix86_cost->lea);
15146 return false;
15147 }
15148 }
15149 /* FALLTHRU */
15150
15151 case ROTATE:
15152 case ASHIFTRT:
15153 case LSHIFTRT:
15154 case ROTATERT:
15155 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15156 {
15157 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15158 {
15159 if (INTVAL (XEXP (x, 1)) > 32)
15160 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15161 else
15162 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15163 }
15164 else
15165 {
15166 if (GET_CODE (XEXP (x, 1)) == AND)
15167 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15168 else
15169 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15170 }
15171 }
15172 else
15173 {
15174 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15175 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15176 else
15177 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15178 }
15179 return false;
15180
15181 case MULT:
15182 if (FLOAT_MODE_P (mode))
15183 *total = COSTS_N_INSNS (ix86_cost->fmul);
15184 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15185 {
15186 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15187 int nbits;
15188
15189 for (nbits = 0; value != 0; value >>= 1)
15190 nbits++;
15191
15192 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15193 + nbits * ix86_cost->mult_bit);
15194 }
15195 else
15196 {
15197 /* This is arbitrary */
15198 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15199 + 7 * ix86_cost->mult_bit);
15200 }
15201 return false;
15202
15203 case DIV:
15204 case UDIV:
15205 case MOD:
15206 case UMOD:
15207 if (FLOAT_MODE_P (mode))
15208 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15209 else
15210 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15211 return false;
15212
15213 case PLUS:
15214 if (FLOAT_MODE_P (mode))
15215 *total = COSTS_N_INSNS (ix86_cost->fadd);
15216 else if (!TARGET_DECOMPOSE_LEA
15217 && GET_MODE_CLASS (mode) == MODE_INT
15218 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15219 {
15220 if (GET_CODE (XEXP (x, 0)) == PLUS
15221 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15222 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15223 && CONSTANT_P (XEXP (x, 1)))
15224 {
15225 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15226 if (val == 2 || val == 4 || val == 8)
15227 {
15228 *total = COSTS_N_INSNS (ix86_cost->lea);
15229 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15230 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15231 outer_code);
15232 *total += rtx_cost (XEXP (x, 1), outer_code);
15233 return true;
15234 }
15235 }
15236 else if (GET_CODE (XEXP (x, 0)) == MULT
15237 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15238 {
15239 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15240 if (val == 2 || val == 4 || val == 8)
15241 {
15242 *total = COSTS_N_INSNS (ix86_cost->lea);
15243 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15244 *total += rtx_cost (XEXP (x, 1), outer_code);
15245 return true;
15246 }
15247 }
15248 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15249 {
15250 *total = COSTS_N_INSNS (ix86_cost->lea);
15251 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15252 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15253 *total += rtx_cost (XEXP (x, 1), outer_code);
15254 return true;
15255 }
15256 }
15257 /* FALLTHRU */
15258
15259 case MINUS:
15260 if (FLOAT_MODE_P (mode))
15261 {
15262 *total = COSTS_N_INSNS (ix86_cost->fadd);
15263 return false;
15264 }
15265 /* FALLTHRU */
15266
15267 case AND:
15268 case IOR:
15269 case XOR:
15270 if (!TARGET_64BIT && mode == DImode)
15271 {
15272 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15273 + (rtx_cost (XEXP (x, 0), outer_code)
15274 << (GET_MODE (XEXP (x, 0)) != DImode))
15275 + (rtx_cost (XEXP (x, 1), outer_code)
15276 << (GET_MODE (XEXP (x, 1)) != DImode)));
15277 return true;
15278 }
15279 /* FALLTHRU */
15280
15281 case NEG:
15282 if (FLOAT_MODE_P (mode))
15283 {
15284 *total = COSTS_N_INSNS (ix86_cost->fchs);
15285 return false;
15286 }
15287 /* FALLTHRU */
15288
15289 case NOT:
15290 if (!TARGET_64BIT && mode == DImode)
15291 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15292 else
15293 *total = COSTS_N_INSNS (ix86_cost->add);
15294 return false;
15295
15296 case FLOAT_EXTEND:
15297 if (!TARGET_SSE_MATH
15298 || mode == XFmode
15299 || (mode == DFmode && !TARGET_SSE2))
15300 *total = 0;
15301 return false;
15302
15303 case ABS:
15304 if (FLOAT_MODE_P (mode))
15305 *total = COSTS_N_INSNS (ix86_cost->fabs);
15306 return false;
15307
15308 case SQRT:
15309 if (FLOAT_MODE_P (mode))
15310 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15311 return false;
15312
15313 case UNSPEC:
15314 if (XINT (x, 1) == UNSPEC_TP)
15315 *total = 0;
15316 return false;
15317
15318 default:
15319 return false;
15320 }
15321 }
对于常量0,它在rtx_cost的867行处进入ix86_rtx_costs,并且在ix86_rtx_costs中,对于32位的x86系统,它在15091行返回value = 0。然后rtx_cost在868行返回并将zero_cost赋为0。
对于reg (10000) + reg (10000),它也是在rtx_cost的867行处进入ix86_rtx_costs,而且满足在15214行的条件,一路往下执行直到15294行。ix86_cost是预先定义的结构体,记录了特定芯片上特定操作的代价。例如pentium,加法的代价是1。最后得到总的代价为4。注意到当ix86_rtx_costs返回时,在rtx_cost里亦是一直执行到875行。对于rtl编码PLUS,其格式是ee,表明其2个孩子均为表达式。对于我们这里的表达式,这2个孩子都是寄存器,它们在855行返回值0。最后add_cost得到值4。
然后init_expmed继续获取关于移位(shift),移位加(shift-add)及移位减(shift-minus)表达式的代价信息。
init_expmed (continue)
119 shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
120 gen_rtx_ASHIFT (word_mode, reg,
121 const0_rtx)));
122
123 shiftadd_insn
124 = emit_insn (gen_rtx_SET (VOIDmode, reg,
125 gen_rtx_PLUS (word_mode,
126 gen_rtx_MULT (word_mode,
127 reg, const0_rtx),
128 reg)));
129
130 shiftsub_insn
131 = emit_insn (gen_rtx_SET (VOIDmode, reg,
132 gen_rtx_MINUS (word_mode,
133 gen_rtx_MULT (word_mode,
134 reg, const0_rtx),
135 reg)));
136
137 init_recog ();
gen_rtx_SET,gen_rtx_ASHIFT,gen_rtx_MULT,gen_rtx_MINUS,正如我们所期望的,都调用了gen_rtx_fmt_ee – 所创建的rtx对象具有2个表达式形式的孩子。对于上面的代码,以下对象将被创建。
图25:移位操作的rtx对象
图26:移位加操作的rtx对象
图27:移位减操作的rtx对象
上面的rtx对象都是指令的样式。在编译过程中标准的rtx对象是insn,它是高级编程语言中的指令,语句或表达式的对等物。emit_insn从rtx对象中创建insn对象来代表函数体。所有的insn对象都通过双向链表来组织,以在源代码中出现顺序排列。
4656 rtx
4657 emit_insn (rtx x) in emit-rtl.c
4658 {
4659 rtx last = last_insn;
4660 rtx insn;
4661
4662 if (x == NULL_RTX)
4663 return last;
4664
4665 switch (GET_CODE (x))
4666 {
4667 case INSN:
4668 case JUMP_INSN:
4669 case CALL_INSN:
4670 case CODE_LABEL:
4671 case BARRIER:
4672 case NOTE:
4673 insn = x;
4674 while (insn)
4675 {
4676 rtx next = NEXT_INSN (insn);
4677 add_insn (insn);
4678 last = insn;
4679 insn = next;
4680 }
4681 break;
4682
4683 #ifdef ENABLE_RTL_CHECKING
4684 case SEQUENCE:
4685 abort ();
4686 break;
4687 #endif
4688
4689 default:
4690 last = make_insn_raw (x);
4691 add_insn (last);
4692 break;
4693 }
4694
4695 return last;
4696 }
对于我们的案例,make_insn_raw被调用来创建insn对象。
3459 rtx
3460 make_insn_raw (rtx pattern) in emit-rtl.c
3461 {
3462 rtx insn;
3463
3464 insn = rtx_alloc (INSN);
3465
3466 INSN_UID (insn) = cur_insn_uid++;
3467 PATTERN (insn) = pattern;
3468 INSN_CODE (insn) = -1;
3469 LOG_LINKS (insn) = NULL;
3470 REG_NOTES (insn) = NULL;
3471 INSN_LOCATOR (insn) = 0;
3472 BLOCK_FOR_INSN (insn) = NULL;
3473
3474 #ifdef ENABLE_RTL_CHECKING
3475 if (insn
3476 && INSN_P (insn)
3477 && (returnjump_p (insn)
3478 || (GET_CODE (insn) == SET
3479 && SET_DEST (insn) == pc_rtx)))
3480 {
3481 warning ("ICE: emit_insn used where emit_jump_insn needed:/n");
3482 debug_rtx (insn);
3483 }
3484 #endif
3485
3486 return insn;
3487 }
下面是以上所用到的一些宏的定义,它们都用于insn对象上,注意到insn也是一个rtx。
561 /* Holds a unique number for each insn.
562 These are not necessarily sequentially increasing. */
563 #define INSN_UID(INSN) XINT (INSN, 0) in rtl.h
564
565 /* Chain insns together in sequence. */
566 #define PREV_INSN(INSN) XEXP (INSN, 1)
567 #define NEXT_INSN(INSN) XEXP (INSN, 2)
568
569 #define BLOCK_FOR_INSN(INSN) XBBDEF (INSN, 3)
570 #define INSN_LOCATOR(INSN) XINT (INSN, 4)
571 /* The body of an insn. */
572 #define PATTERN(INSN) XEXP (INSN, 5)
573
574 /* Code number of instruction, from when it was recognized.
575 -1 means this instruction has not been recognized yet. */
576 #define INSN_CODE(INSN) XINT (INSN, 6)
577
578 /* Set up in flow.c; empty before then.
579 Holds a chain of INSN_LIST rtx's whose first operands point at
580 previous insns with direct data-flow connections to this one.
581 That means that those insns set variables whose next use is in this insn.
582 They are always in the same basic block as this insn. */
583 #define LOG_LINKS(INSN) XEXP (INSN, 7)
584
585 /* Holds a list of notes on what this insn does to various REGs.
586 It is a chain of EXPR_LIST rtx's, where the second operand is the
587 chain pointer and the first operand is the REG being described.
588 The mode field of the EXPR_LIST contains not a real machine mode
589 but a value from enum reg_note. */
590
591 #define REG_NOTES(INSN) XEXP (INSN, 8)
在4691行,emit_insn,add_insn把所创建的insn对象链入cfun对象中。
3534 void
3535 add_insn (rtx insn) in emit-rtl.c
3536 {
3537 PREV_INSN (insn) = last_insn;
3538 NEXT_INSN (insn) = 0;
3539
3540 if (NULL != last_insn)
3541 NEXT_INSN (last_insn) = insn;
3542
3543 if (NULL == first_insn)
3544 first_insn = insn;
3545
3546 last_insn = insn;
3547 }
从上面的代码,可以看到insn对象应该看起来像下面那样。
图28:指令的rtx对象
在init_expmed 的137行,init_reg仅把全局变量volatile_ok设为1,这个变量不为0表示允许操作数为volatile。然后init_expmed跟着来收集表达式的代价。在这以后,还需要评估取反,除法及取模的表达式。
init_expmed (continue)
139 shift_cost[0] = 0;
140 shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
141
142 for (m = 1; m < MAX_BITS_PER_WORD; m++)
143 {
144 rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
145 shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
146
147 XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
148 if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
149 shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
150
151 XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
152 if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
153 shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
154
155 XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
156 if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
157 shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
158 }
159
160 negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
161
162 sdiv_pow2_cheap
163 = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
164 <= 2 * add_cost);
165 smod_pow2_cheap
166 = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
167 <= 2 * add_cost);
SET_SRC执行检查以确保RTX_CODE是SET。
542 #define XCEXP(RTX, N, C) (RTL_CHECKC1 (RTX, N, C).rtx) in rtl.h
1245 #define SET_SRC(RTX) XCEXP(RTX, 1, SET)
在上面,对于32位x86系统,已经得到add_cost为4。对于移位为0的移位操作,实际上无事可做,因此其代价应该为0。因而具有0移位的移位加/减操作的代价应该等同于add_cost。这正是139~140行的目的。
上面的recog是由工具genrecog通过机器描述文件(这里是i386.md)生成的。recog的返回值是insn-code,如果为-1,则表明该insn不能被识别。
可以看到对于不能识别的insn,其代价被设为32000,一个非常大的值。如果insn被识别,在它的样式中(上面图中所显示的rtx对象),rtx对象const_0被rtx对象const_`m`所替代,并尝试评估其代价。注意到被评估的东西是rtx对象SET的第二个孩子。
在rtx_cost的帮助下,我们可以获得以下的信息(以pentium4为例):
shift_cost [1] = add_cost = 4
shift_cost [2] = shift_cost [3] = lea cost = 4
shift_cost [4] … shift_cost [31] = constant shift cost = 16
shiftadd_cost [1] (with multiplicator = 1 << 1) = lea cost = 4
shiftadd_cost [2] (with multiplicator = 1 << 2) = lea cost = 4
shiftadd_cost [3] (with multiplicator = 1 << 3) = lea cost = 4
shiftadd_cost [with other multiplicator] = mult cost + add cost = 64
shiftsub_cost [n] = add cost + mult cost = 64
在上面的160行,如下rtx对象被创建。
图29:NEG,DIV及MOD的rtx对象
并且又得到
neg_cost = add_cost = 4
div_cost = 224 > add_cost * 2, sdiv_pow2_cheap = false
mod_cost = 224 > add_cost * 2, sdiv_pow2_cheap = false
接下来,因为不同机器模式乘法和除法操作可能会有不同的代价,并且整数类型是最常用到的,我们把这些值保存到静态变量中。
init_expmed (continue)
169 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
170 mode != VOIDmode;
171 mode = GET_MODE_WIDER_MODE (mode))
172 {
173 reg = gen_rtx_REG (mode, 10000);
174 div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
175 mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
176 wider_mode = GET_MODE_WIDER_MODE (mode);
177 if (wider_mode != VOIDmode)
178 {
179 mul_widen_cost[(int) wider_mode]
180 = rtx_cost (gen_rtx_MULT (wider_mode,
181 gen_rtx_ZERO_EXTEND (wider_mode, reg),
182 gen_rtx_ZERO_EXTEND (wider_mode, reg)),
183 SET);
184 mul_highpart_cost[(int) mode]
185 = rtx_cost (gen_rtx_TRUNCATE
186 (mode,
187 gen_rtx_LSHIFTRT (wider_mode,
188 gen_rtx_MULT (wider_mode,
189 gen_rtx_ZERO_EXTEND
190 (wider_mode, reg),
191 gen_rtx_ZERO_EXTEND
192 (wider_mode, reg)),
193 GEN_INT (GET_MODE_BITSIZE (mode)))),
194 SET);
195 }
196 }
197
198 end_sequence ();
199 }
上面,对于pentium4,div_cost及mul_cost对于所有的整型模式都是相同的,它们都是224。179行,mul_widen_cost记录了有模式提升(mode promotion)的乘法操作。它们是对以下rtx对象评估的结果(以Simode为例)。
图30:SImode的乘法的rtx对象
对于pentium4,我们得到mul_widen_cost [mode]都是232,mul_highpart_cost,对于SImode是264,其余为248。在完成所有这些操作后,init_expmed调用end_sequence来恢复之前保存的状态。