Back to init_regs, next calls init_reg_autoinc. This function for x86 machine will set forbidden_inc_dec_class for all registers, as x86 hasn’t any sort of auto increment addressing. In init_reg_autoinc below, FORBIDDEN_INC_DEC_CLASSES is defined if either macro SECONDARY_INPUT_RELOAD_CLASS or SECONDARY_OUTPUT_RELOAD_CLASS is defined. For x86 machine, SECONDARY_OUTPUT_RELOAD_CLASS is defined as:
1560 #define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, OUT) /
1561 (((CLASS) == GENERAL_REGS || (CLASS) == LEGACY_REGS /
1562 || (CLASS) == INDEX_REGS) && !TARGET_64BIT && (MODE) == QImode /
1563 ? Q_REGS : NO_REGS)
The meaning of these macros are given by [2] in below.
-- Macro: SECONDARY_RELOAD_CLASS (CLASS, MODE, X) -- Macro: SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X) -- Macro: SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X) Many machines have some registers that cannot be copied directly to or from memory or even from other types of registers. An example is the `MQ' register, which on most machines, can only be copied to or from general registers, but not memory. Some machines allow copying all registers to and from memory, but require a scratch register for stores to some memory locations (e.g., those with symbolic address on the RT, and those with certain symbolic address on the SPARC when compiling PIC). In some cases, both an intermediate and a scratch register are required. You should define these macros to indicate to the reload phase that it may need to allocate at least one register for a reload in addition to the register to contain the data. Specifically, if copying X to a register CLASS in MODE requires an intermediate register, you should define SECONDARY_INPUT_RELOAD_CLASS to return the largest register class all of whose registers can be used as intermediate registers or scratch registers. If copying a register CLASS in MODE to X requires an intermediate or scratch register, SECONDARY_OUTPUT_RELOAD_CLASS should be defined to return the largest register class required. If the requirements for input and output reloads are the same, the macro SECONDARY_RELOAD_CLASS should be used instead of defining both macros identically. The values returned by these macros are often GENERAL_REGS. Return NO_REGS if no spare register is needed; i.e., if X can be directly copied to or from a register of CLASS in MODE without requiring a scratch register. Do not define this macro if it would always return NO_REGS. |
* the reload pass of the compiler is run after register allocation has been done. It checks that each insn (instruction of intermediate form) is valid (operands required to be in registers really are in registers of the proper class) and fixes up invalid ones by copying values temporarily into registers for the insns that need them.
1133 static void
1134 init_reg_autoinc (void) in regclass.c
1135 {
1136 #ifdef FORBIDDEN_INC_DEC_CLASSES
1137 int i;
1138
1139 for (i = 0; i < N_REG_CLASSES; i++)
1140 {
1141 rtx r = gen_rtx_raw_REG (VOIDmode, 0);
1142 enum machine_mode m;
1143 int j;
1144
1145 for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
1146 if (TEST_HARD_REG_BIT (reg_class_contents[i], j))
1147 {
1148 REGNO (r) = j;
1149
1150 for (m = VOIDmode; (int) m < (int) MAX_MACHINE_MODE;
1151 m = (enum machine_mode) ((int) m + 1))
1152 if (HARD_REGNO_MODE_OK (j, m))
1153 {
1154 PUT_MODE (r, m);
1155
1156 /* If a register is not directly suitable for an
1157 auto-increment or decrement addressing mode and
1158 requires secondary reloads, disallow its class from
1159 being used in such addresses. */
1160
1161 if ((0
1162 #ifdef SECONDARY_RELOAD_CLASS
1163 || (SECONDARY_RELOAD_CLASS (MODE_BASE_REG_CLASS (VOIDmode), m, r)
1164 != NO_REGS)
1165 #else
1166 #ifdef SECONDARY_INPUT_RELOAD_CLASS
1167 || (SECONDARY_INPUT_RELOAD_CLASS (MODE_BASE_REG_CLASS (VOIDmode), m, r)
1168 != NO_REGS)
1169 #endif
1170 #ifdef SECONDARY_OUTPUT_RELOAD_CLASS
1171 || (SECONDARY_OUTPUT_RELOAD_CLASS (MODE_BASE_REG_CLASS (VOIDmode), m, r)
1172 != NO_REGS)
1173 #endif
1174 #endif
1175 )
1176 && ! auto_inc_dec_reg_p (r, m))
1177 forbidden_inc_dec_class[i] = 1;
1178 }
1179 }
1180 }
1181 #endif /* FORBIDDEN_INC_DEC_CLASSES */
1182 }
MODE_BASE_REG_CLASS above always returns GENERAL_REGS for x86 machine. And in auto_inc_dec_reg_p, HAVE_* macros below are defined as 0 by default, so the function always returns 0 for x86 machine too.
2106 static int
2107 auto_inc_dec_reg_p (rtx reg, enum machine_mode mode) in regclass.c
2108 {
2109 if (HAVE_POST_INCREMENT
2110 && memory_address_p (mode, gen_rtx_POST_INC (Pmode, reg)))
2111 return 1;
2112
2113 if (HAVE_POST_DECREMENT
2114 && memory_address_p (mode, gen_rtx_POST_DEC (Pmode, reg)))
2115 return 1;
2116
2117 if (HAVE_PRE_INCREMENT
2118 && memory_address_p (mode, gen_rtx_PRE_INC (Pmode, reg)))
2119 return 1;
2120
2121 if (HAVE_PRE_DECREMENT
2122 && memory_address_p (mode, gen_rtx_PRE_DEC (Pmode, reg)))
2123 return 1;
2124
2125 return 0;
2126 }
Back to backend_init, after initializing the register set, it calls init_fake_stack_mems, which initializes some fake stack-frame memory references for use in memory_move_secondary_cost for testing purpose.
574 void
575 init_fake_stack_mems (void) in regclass.c
576 {
577 #ifdef HAVE_SECONDARY_RELOADS
578 {
579 int i;
580
581 for (i = 0; i < MAX_MACHINE_MODE; i++)
582 top_of_stack[i] = gen_rtx_MEM (i, stack_pointer_rtx);
583 }
584 #endif
585 }
gen_rtx_MEM has following definition which generates rtx object for memory reference.
600 rtx
601 gen_rtx_MEM (enum machine_mode mode, rtx addr) in emit-rtl.c
602 {
603 rtx rt = gen_rtx_raw_MEM (mode, addr);
604
605 /* This field is not cleared by the mere allocation of the rtx, so
606 we clear it here. */
607 MEM_ATTRS (rt) = 0;
608
609 return rt;
610 }
gen_rtx_raw_MEM is generated by gengenrtl tool from rtl.def. It is in the generated file genrtl.h. gen_rtx_raw_MEM invokes gen_rtx_fmt_e0 to create the rtx object. From the name of the function, it indicates that the two children are present, one is a rtx expression, the other just NULL_RTX.
Following MEM_ATTRS replaces the second child of the rtx object with memory attribute. But now it is also null.
1163 #define MEM_ATTRS(RTX) X0MEMATTR (RTX, 1) in rtl.h
533 #define X0MEMATTR(RTX, N) (RTL_CHECKC1 (RTX, N, MEM).rtmem) in rtl.h
Then in backend_init, next init_alias_once is invoked. It sets up necessary data structure for aliasing analysis during compilation.
backend_init (continue)
4509 init_alias_once ();
4510 init_loop ();
4511 init_reload ();
4512 init_function_once ();
4513 init_varasm_once ();
4514
4515 /* The following initialization functions need to generate rtl, so
4516 provide a dummy function context for them. */
4517 init_dummy_function_start ();
4518 init_expmed ();
4519 if (flag_caller_saves)
4520 init_caller_save ();
4521 expand_dummy_function_end ();
4522 }
The function creates static_reg_base_value which contains registers available for address as parameter (pointer) and those of special purpose (i.e. stack pointer, arg pointer, frame pointer – necessary for function invocation).
2736 void
2737 init_alias_once (void) in alias.c
2738 {
2739 int i;
2740
2741 #ifndef OUTGOING_REGNO
2742 #define OUTGOING_REGNO(N) N
2743 #endif
2744 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2745 /* Check whether this register can hold an incoming pointer
2746 argument. FUNCTION_ARG_REGNO_P tests outgoing register
2747 numbers, so translate if necessary due to register windows. */
2748 if (FUNCTION_ARG_REGNO_P (OUTGOING_REGNO (i))
2749 && HARD_REGNO_MODE_OK (i, Pmode))
2750 static_reg_base_value[i]
2751 = gen_rtx_ADDRESS (VOIDmode, gen_rtx_REG (Pmode, i));
2752
2753 static_reg_base_value[STACK_POINTER_REGNUM]
2754 = gen_rtx_ADDRESS (Pmode, stack_pointer_rtx);
2755 static_reg_base_value[ARG_POINTER_REGNUM]
2756 = gen_rtx_ADDRESS (Pmode, arg_pointer_rtx);
2757 static_reg_base_value[FRAME_POINTER_REGNUM]
2758 = gen_rtx_ADDRESS (Pmode, frame_pointer_rtx);
2759 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2760 static_reg_base_value[HARD_FRAME_POINTER_REGNUM]
2761 = gen_rtx_ADDRESS (Pmode, hard_frame_pointer_rtx);
2762 #endif
2763 }
Above, OUTGOING_REGNO is defined speically for machine incorporating register windows. The basic idea of register windows is to provide a very large set of physical registers, most of which are organized as a collection of overlapping windows. Take Sparc as example, a few register names (r0 ~ r7) always refer to the same locations, it is of global usage, but the rest (r8 ~ r31) are interpreted relative to the currently active window. On a subroutine call, the hardware moves to a different window. To facilitate the passing of parameters, the old and new windows overlap: the top few registers in the caller’s window (r24 ~ r31) are the same as the bottom few registers in the callee’s window (r8 ~ r15). The compiler places values of use only within the current subroutine in the middle part of the window. In practice, eight windows appear to be sufficent for typical programs, and they are treated as cycle buffer. Register windows was originally designed for Berkeley RISC machine. More detail about register windows please refer to p240 [1].
In GCC source, we can see that IA64, Sparc, Xtensa have register windows. OUTGOING_REGNO (IN) should return the register number as seen by the calling function corresponding to the register number IN as seen by the called function. For x86 machines, the macro OUTGOING_REGNO (i) is just i (defined at line 2742 above), which means register number i is not an inbound register.
figure 19: register windows
At line 2748, FUNCTION_ARG_REGNO_P checks whether certain register can be used for passing function parameters. For x86 machine, we get:
1744 /* 1 if N is a possible register number for function argument passing. */
1745 #define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N) in i386.h
1825 bool
1826 ix86_function_arg_regno_p (int regno) in i386.c
1827 {
1828 int i;
1829 if (!TARGET_64BIT)
1830 return (regno < REGPARM_MAX
1831 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
...
1841 }
REGPARM_MAX defines the max number of parameter can be passed via registers. For x86 machine it is defined as below.
2551 #define REGPARM_MAX (TARGET_64BIT ? 6 : 3) in i386.h
GCC defines a global integer variable target_flags. It’s a bit flags that specify the machine subtype we are compiling for. Bits are tested using macros TARGET_* defined in the target machine.h file and set by –m* switches. TARGET_SSE above checks if the machine can support sse instructions set.
From the function, we can see that for 32 bits ABI, x86 machine can use ax, dx, cx, and xmm0 ~ xmm7 to pass parameters for function. If the register can be used to pass function arguments, and it’s compatible with Pmode (can save address for the target machine), it needs create an unique rtx object of address for static_reg_base_value which will represent base address for indirect memory access.
535 rtx
536 gen_rtx_REG (enum machine_mode mode, unsigned int regno) in emit-rtl.c
537 {
538 /* In case the MD file explicitly references the frame pointer, have
539 all such references point to the same frame pointer. This is
540 used during frame pointer elimination to distinguish the explicit
541 references to these registers from pseudos that happened to be
542 assigned to them.
543
544 If we have eliminated the frame pointer or arg pointer, we will
545 be using it as a normal register, for example as a spill
546 register. In such cases, we might be accessing it in a mode that
547 is not Pmode and therefore cannot use the pre-allocated rtx.
548
549 Also don't do this when we are making new REGs in reload, since
550 we don't want to get confused with the real pointers. */
551
552 if (mode == Pmode && !reload_in_progress)
553 {
554 if (regno == FRAME_POINTER_REGNUM
555 && (!reload_completed || frame_pointer_needed))
556 return frame_pointer_rtx;
557 #if FRAME_POINTER_REGNUM != HARD_FRAME_POINTER_REGNUM
558 if (regno == HARD_FRAME_POINTER_REGNUM
559 && (!reload_completed || frame_pointer_needed))
560 return hard_frame_pointer_rtx;
561 #endif
562 #if FRAME_POINTER_REGNUM != ARG_POINTER_REGNUM && HARD_FRAME_POINTER_REGNUM != ARG_POINTER_REGNUM
563 if (regno == ARG_POINTER_REGNUM)
564 return arg_pointer_rtx;
565 #endif
566 #ifdef RETURN_ADDRESS_POINTER_REGNUM
567 if (regno == RETURN_ADDRESS_POINTER_REGNUM)
568 return return_address_pointer_rtx;
569 #endif
570 if (regno == (unsigned) PIC_OFFSET_TABLE_REGNUM
571 && fixed_regs[PIC_OFFSET_TABLE_REGNUM])
572 return pic_offset_table_rtx;
573 if (regno == STACK_POINTER_REGNUM)
574 return stack_pointer_rtx;
575 }
576
577 #if 0
578 /* If the per-function register table has been set up, try to re-use
579 an existing entry in that table to avoid useless generation of RTL.
580
581 This code is disabled for now until we can fix the various backends
582 which depend on having non-shared hard registers in some cases. Long
583 term we want to re-enable this code as it can significantly cut down
584 on the amount of useless RTL that gets generated.
585
586 We'll also need to fix some code that runs after reload that wants to
587 set ORIGINAL_REGNO. */
588
589 if (cfun
590 && cfun->emit
591 && regno_reg_rtx
592 && regno < FIRST_PSEUDO_REGISTER
593 && reg_raw_mode[regno] == mode)
594 return regno_reg_rtx[regno];
595 #endif
596
597 return gen_raw_REG (mode, regno);
598 }
In function above, frame_pointer_rtx, hard_frame_pointer_rtx, arg_pointer_rtx and stack_pointer_rtx are all members of global_rtx, which are initialized in function init_emit_once. Also in init_emit_once, register of number less than first pseudo register number has been created, so pay attention to the code and comment disabled in above function. The last return statement will create an already created rtx register object again if it is hit.
To understand the registers referred above, we find following related definitions.
1151 /* Register to use for pushing function arguments. */ in i386.h
1152 #define STACK_POINTER_REGNUM 7
1153
1154 /* Base register for access to local variables of the function. */
1155 #define HARD_FRAME_POINTER_REGNUM 6
1156
1157 /* Base register for access to local variables of the function. */
1158 #define FRAME_POINTER_REGNUM 20
1198 /* Base register for access to arguments of the function. */
1199 #define ARG_POINTER_REGNUM 16
1206 /* Register to hold the addressing base for position independent
1207 code access to data items. We don't use PIC pointer for 64bit
1208 mode. Define the regnum to dummy value to prevent gcc from
1209 pessimizing code dealing with EBX.
1210
1211 To avoid clobbering a call-saved register unnecessarily, we renumber
1212 the pic register when possible. The change is visible after the
1213 prologue has been emitted. */
1214
1215 #define REAL_PIC_OFFSET_TABLE_REGNUM 3
1216
1217 #define PIC_OFFSET_TABLE_REGNUM /
1218 (TARGET_64BIT || !flag_pic ? INVALID_REGNUM /
1219 : reload_completed ? REGNO (pic_offset_table_rtx) /
1220 : REAL_PIC_OFFSET_TABLE_REGNUM)
In x86 machine, STACK_POINTER_REGNUM is sp, ARG_POINTER_REGNUM is arg (pesudo), HARD_FRAME_POINTER_REGNUM is bp, and FRAME_POINTER_REGNUM is frame (pesudo). To hold the addressing base for position independent code access to data items, x86 machine uses bx before reload pass complete.
Rtx object returned by gen_rtx_REG, is passed to gen_rtx_ADDRESS at line 2751 in init_alias_once. gen_rtx_ADDRESS of course is target machine dependent. For x86 machine, it is defined as gen_rtx_fmt_e with code of ADDRESS. The ‘e’ in the name means the content of the rtx expression is another expression.
231 rtx
232 gen_rtx_fmt_e (RTX_CODE code, enum machine_mode mode, in genrtl.c
233 rtx arg0)
234 {
235 rtx rt;
236 rt = rtx_alloc (code);
237
238 PUT_MODE (rt, mode);
239 XEXP (rt, 0) = arg0;
240
241 return rt;
242 }
XEXP is resemble X0EXP, except the type it expects. For X0EXP, it is ‘0’, which means unused (or used in a phase-dependent manner).
508 #define XEXP(RTX, N) (RTL_CHECK2 (RTX, N, 'e', 'u').rtx) in rtl.h
316 #define RTL_CHECK2(RTX, N, C1, C2) __extension__ /
317 (*({ rtx const _rtx = (RTX); const int _n = (N); /
318 const enum rtx_code _code = GET_CODE (_rtx); /
319 if (_n < 0 || _n >= GET_RTX_LENGTH (_code)) /
320 rtl_check_failed_bounds (_rtx, _n, __FILE__, __LINE__, /
321 __FUNCTION__); /
322 if (GET_RTX_FORMAT(_code)[_n] != C1 /
323 && GET_RTX_FORMAT(_code)[_n] != C2) /
324 rtl_check_failed_type2 (_rtx, _n, C1, C2, __FILE__, __LINE__, /
325 __FUNCTION__); /
326 &_rtx->u.fld[_n]; }))Initialize Rtx Objects for Aliasing Code