GCC-3.4.6源代码学习笔记(44)

4.2.3.3.    确定寄存器间移动数据的代价

接下来,init_reg_sets_1评估不同类别寄存器间移动数据的代价,而后是评估寄存器和内存之间。

 

init_reg_sets_1 (continue)

 

477   /* Initialize the move cost table. Find every subset of each class

478      and take the maximum cost of moving any subset to any other.  */

479 

480    for (m = 0; m < (unsigned int) MAX_MACHINE_MODE; m++)

481      if (allocatable_regs_of_mode [m])

482      {

483        for (i = 0; i < N_REG_CLASSES; i++)

484          if (contains_reg_of_mode [i][m])

485            for (j = 0; j < N_REG_CLASSES; j++)

486            {

487              int cost;

488              enum reg_class *p1, *p2;

489 

490              if (!contains_reg_of_mode [j][m])

491              {

492                move_cost[m][i][j] = 65536;

493                may_move_in_cost[m][i][j] = 65536;

494                may_move_out_cost[m][i][j] = 65536;

495              }

496              else

497              {

498                cost = REGISTER_MOVE_COST (m, i, j);

499 

500                for (p2 = &reg_class_subclasses[j][0];

501                    *p2 != LIM_REG_CLASSES;

502                    p2++)

503                  if (*p2 != i && contains_reg_of_mode [*p2][m])

504                    cost = MAX (cost, move_cost [m][i][*p2]);

505 

506                for (p1 = &reg_class_subclasses[i][0];

507                    *p1 != LIM_REG_CLASSES;

508                    p1++)

509                  if (*p1 != j && contains_reg_of_mode [*p1][m])

510                    cost = MAX (cost, move_cost [m][*p1][j]);

511  

512                move_cost[m][i][j] = cost;

513 

514                if (reg_class_subset_p (i, j))

515                  may_move_in_cost[m][i][j] = 0;

516                else

517                  may_move_in_cost[m][i][j] = cost;

518 

519                if (reg_class_subset_p (j, i))

520                  may_move_out_cost[m][i][j] = 0;

521                else

522                  may_move_out_cost[m][i][j] = cost;

523              }

524           }

525          else

526            for (j = 0; j < N_REG_CLASSES; j++)

527            {

528              move_cost[m][i][j] = 65536;

529              may_move_in_cost[m][i][j] = 65536;

530              may_move_out_cost[m][i][j] = 65536;

531            }

532      }

533  }

 

毫无疑问,在寄存器间移动数据,根据目标寄存器的不同,会有不同的代价。有些移动是不可能的,因为目标寄存器不允许,而有些移动需要临时的内存。上面,move_cost用于记录从一个类别的寄存器移动数据到另一个类别寄存器的最大代价。对于允许的移动,宏REGISTER_MOVE_COST评估其代价。

 

2645 #define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) /                    in i386.h

2646    ix86_register_move_cost ((MODE), (CLASS1), (CLASS2))

 

14880 int

14881 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,      in i386.c

14882            enum reg_class class2)

14883 {

14884   /* In case we require secondary memory, compute cost of the store followed

14885     by load. In order to avoid bad register allocation choices, we need

14886     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */

14887

14888   if (ix86_secondary_memory_needed (class1, class2, mode, 0))

14889   {

14890     int cost = 1;

14891

14892     cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),

14893        MEMORY_MOVE_COST (mode, class1, 1));

14894     cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),

14895        MEMORY_MOVE_COST (mode, class2, 1));

14896

14897     /* In case of copying from general_purpose_register we may emit multiple

14898       stores followed by single load causing memory size mismatch stall.

14899       Count this as arbitrarily high cost of 20.  */

14900     if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))

14901       cost += 20;

14902

14903     /* In the case of FP/MMX moves, the registers actually overlap, and we

14904       have to switch modes in order to treat them differently.  */

14905     if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))

14906           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))

14907       cost += 20;

14908

14909       return cost;

14910   }

14911

14912   /* Moves between SSE/MMX and integer unit are expensive.  */

14913   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)

14914       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))

14915     return ix86_cost->mmxsse_to_integer;

14916   if (MAYBE_FLOAT_CLASS_P (class1))

14917     return ix86_cost->fp_move;

14918   if (MAYBE_SSE_CLASS_P (class1))

14919     return ix86_cost->sse_move;

14920   if (MAYBE_MMX_CLASS_P (class1))

14921     return ix86_cost->mmx_move;

14922   return 2;

14923 }

 

上面,在14888行,ix86_secondary_memory_needed检查在这2个寄存器1中移动数据是否需要临时内存。

 

14852 Int                                                                                                                       in i386.c

14853 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,

14854                  enum machine_mode mode, int strict)

14855 {

14856   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)

14857       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)

14858       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)

14859       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)

14860       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)

14861       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))

14862   {

14863     if (strict)

14864       abort ();

14865     else

14866       return 1;

14867   }

14868   return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)

14869      || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)

14870      || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))

14871           && ((mode != SImode && (mode != DImode || !TARGET_64BIT))

14872             || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));

14873 }

 

注意到函数最后的参数在这里是0。而且这是该函数唯一被调用的地方。宏MAYBE_FLOAT_CLASS_P检查寄存器的类别是否与FLOAT_REG类别有重合。而宏FLOAT_CLASS_P检查寄存器的类别是否被FLOAT_REG类别所包含。其他宏也是类似的。FLOAT_CLASS_PMAYBE_FLOAT_CLASS_P的定义如下:

 

1318 #define MAYBE_FLOAT_CLASS_P(CLASS) /                                        in i386.h

1319   reg_classes_intersect_p ((CLASS), FLOAT_REGS)

 

1310 #define FLOAT_CLASS_P(CLASS) /

1311   reg_class_subset_p ((CLASS), FLOAT_REGS)

 

2545 int

2546 reg_classes_intersect_p (enum reg_class c1, enum reg_class c2)                      in regclass.c

2547 {

2548   HARD_REG_SET c;

2549

2550   if (c1 == c2) return 1;

2551

2552   if (c1 == ALL_REGS || c2 == ALL_REGS)

2553     return 1;

2554

2555   COPY_HARD_REG_SET (c, reg_class_contents[(int) c1]);

2556   AND_HARD_REG_SET (c, reg_class_contents[(int) c2]);

2557

2558   GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);

2559   return 1;

2560

2561 lose:

2562   return 0;

2563 }

 

2529 int

2530 reg_class_subset_p (enum reg_class c1, enum reg_class c2)                           in regclass.c

2531 {

2532   if (c1 == c2) return 1;

2533

2534   if (c2 == ALL_REGS)

2535 win:

2536     return 1;

2537   GO_IF_HARD_REG_SUBSET (reg_class_contents[(int) c1],

2538       reg_class_contents[(int) c2],

2539       win);

2540   return 0;

2541 }

 

那么在ix86_register_move_cost14888行,如果class1class2所从属的类别没有重合,或相反;或者如果classes1class2不属于同一个类别,ix86_secondary_memory_needed将返回true,即在这些寄存器间移动数据需要临时内存。系统首先需要把寄存器1的内容放到内存,然后从内存读入到寄存器2中,因为在这2个寄存器中对这个值的编码是不同的,直接的拷贝是不合适的。因此需要2次计入代价,一次从寄存器1中移出,另一次移入寄存器2。谨慎起见,只考虑每个类别中最大的代价。

 

2656 #define MEMORY_MOVE_COST(MODE, CLASS, IN)   /                           in i386.h

2657   ix86_memory_move_cost ((MODE), (CLASS), (IN))

 

14984 int                                                                                                          in i386.c

14985 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)

14986 {

14987   if (FLOAT_CLASS_P (class))

14988   {

14989     int index;

14990     switch (mode)

14991     {

14992       case SFmode:

14993         index = 0;

14994         break;

14995       case DFmode:

14996         index = 1;

14997         break;

14998       case XFmode:

14999         index = 2;

15000         break;

15001       default:

15002         return 100;

15003     }

15004     return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];

15005   }

15006   if (SSE_CLASS_P (class))

15007   {

15008     int index;

15009     switch (GET_MODE_SIZE (mode))

15010     {

15011       case 4:

15012         index = 0;

15013         break;

15014       case 8:

15015         index = 1;

15016         break;

15017       case 16:

15018         index = 2;

15019         break;

15020       default:

15021         return 100;

15022     }

15023     return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];

15024   }

15025   if (MMX_CLASS_P (class))

15026   {

15027     int index;

15028     switch (GET_MODE_SIZE (mode))

15029     {

15030       case 4:

15031         index = 0;

15032         break;

15033       case 8:

15034         index = 1;

15035         break;

15036       default:

15037         return 100;

15038     }

15039     return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];

15040   }

15041   switch (GET_MODE_SIZE (mode))

15042   {

15043     case 1:

15044       if (in)

15045         return (Q_CLASS_P (class) ? ix86_cost->int_load[0]

15046              : ix86_cost->movzbl_load);

15047       else

15048         return (Q_CLASS_P (class) ? ix86_cost->int_store[0]

15049              : ix86_cost->int_store[0] + 4);

15050       break;

15051     case 2:

15052       return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];

15053     default:

15054       /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */

15055       if (mode == TFmode)

15056         mode = XFmode;

15057       return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])

15058            * (((int) GET_MODE_SIZE (mode)

15059            + UNITS_PER_WORD - 1) / UNITS_PER_WORD));

15060   }

15061 }

 

这里关键的数据结构是ix86_cost,它记录了特定处理器相关的代价数据。对于x86机器,有如下的定义:

 

416  static const                                                                                                       in i386.c

417  struct processor_costs pentium4_cost = {

418    1,                                   /* cost of an add instruction */

419    1,                                   /* cost of a lea instruction */

420    4,                                   /* variable shift costs */

421    4,                                   /* constant shift costs */

422    {15, 15, 15, 15, 15},        /* cost of starting a multiply */

423    0,                                   /* cost of multiply per each bit set */

424    {56, 56, 56, 56, 56},        /* cost of a divide/mod */

425    1,                                   /* cost of movsx */

426    1,                                   /* cost of movzx */

427    16,                                  /* "large" insn */

428    6,                                   /* MOVE_RATIO */

429    2,                                   /* cost for loading QImode using movzbl */

430    {4, 5, 4},                        /* cost of loading integer registers

431                                    in QImode, HImode and SImode.

432                                    Relative to reg-reg move (2).  */

433    {2, 3, 2},                        /* cost of storing integer registers */

434    2,                                   /* cost of reg,reg fld/fst */

435    {2, 2, 6},                        /* cost of loading fp registers

436                                    in SFmode, DFmode and XFmode */

437    {4, 4, 6},                        /* cost of loading integer registers */

438    2,                                   /* cost of moving MMX register */

439    {2, 2},                            /* cost of loading MMX registers

440                                    in SImode and DImode */

441    {2, 2},                            /* cost of storing MMX registers

442                                    in SImode and DImode */

443    12,                                  /* cost of moving SSE register */

444    {12, 12, 12},                   /* cost of loading SSE registers

445                                    in SImode, DImode and TImode */

446    {2, 2, 8},                        /* cost of storing SSE registers

447                                    in SImode, DImode and TImode */

448    10,                                  /* MMX or SSE register to integer */

449    64,                                  /* size of prefetch block */

450    6,                                   /* number of parallel prefetches */

451    2,                                   /* Branch cost */

452    5,                                   /* cost of FADD and FSUB insns.  */

453    7,                                   /* cost of FMUL instruction.  */

454    43,                                  /* cost of FDIV instruction.  */

455    2,                                   /* cost of FABS instruction.  */

456    2,                                   /* cost of FCHS instruction.  */

457    43,                                  /* cost of FSQRT instruction.  */

458  };

459 

460  const struct processor_costs *ix86_cost = &pentium_cost;

 

processor_costs的定义为:

 

39    struct processor_costs {                                                                              in i386.h

40      const int add;                   /* cost of an add instruction */

41      const int lea;                    /* cost of a lea instruction */

42      const int shift_var;            /* variable shift costs */

43      const int shift_const;  /* constant shift costs */

44      const int mult_init[5];       /* cost of starting a multiply

45                                   in QImode, HImode, SImode, DImode, TImode*/

46      const int mult_bit;            /* cost of multiply per each bit set */

47      const int divide[5];           /* cost of a divide/mod

48                                   in QImode, HImode, SImode, DImode, TImode*/

49      int movsx;                       /* The cost of movsx operation.  */

50      int movzx;                /* The cost of movzx operation.  */

51      const int large_insn;          /* insns larger than this cost more */

52      const int move_ratio; /* The threshold of number of scalar

53                                   memory-to-memory move insns.  */

54      const int movzbl_load;      /* cost of loading using movzbl */

55      const int int_load[3]; /* cost of loading integer registers

56                                   in QImode, HImode and SImode relative

57                                   to reg-reg move (2).  */

58      const int int_store[3]; /* cost of storing integer register

59                                   in QImode, HImode and SImode */

60      const int fp_move;            /* cost of reg,reg fld/fst */

61      const int fp_load[3];         /* cost of loading FP register

62                                   in SFmode, DFmode and XFmode */

63      const int fp_store[3]; /* cost of storing FP register

64                                   in SFmode, DFmode and XFmode */

65      const int mmx_move;       /* cost of moving MMX register.  */

66      const int mmx_load[2];     /* cost of loading MMX register

67                                   in SImode and DImode */

68      const int mmx_store[2];    /* cost of storing MMX register

69                                   in SImode and DImode */

70      const int sse_move;           /* cost of moving SSE register.  */

71      const int sse_load[3]; /* cost of loading SSE register

72                                   in SImode, DImode and TImode*/

73      const int sse_store[3];       /* cost of storing SSE register

74                                   in SImode, DImode and TImode*/

75      const int mmxsse_to_integer;    /* cost of moving mmxsse register to

76                                       integer and vice versa.  */

77      const int prefetch_block;   /* bytes moved to cache for prefetch.  */

78      const int simultaneous_prefetches; /* number of parallel prefetch

79                                           operations.  */

80      const int branch_cost;       /* Default value for BRANCH_COST.  */

81      const int fadd;                  /* cost of FADD and FSUB instructions.  */

82      const int fmul;                 /* cost of FMUL instruction.  */

83      const int fdiv;                  /* cost of FDIV instruction.  */

84      const int fabs;                  /* cost of FABS instruction.  */

85      const int fchs;                  /* cost of FCHS instruction.  */

86      const int fsqrt;                  /* cost of FSQRT instruction.  */

87    };

 

从这个定义,可以知道大多数数据需要预先定义,预先提供。要做一个编译器,需要很好地了解CPU

ix86_register_move_cost1489214894行,把2个方向上代价较高者作为所寻求的代价并存入move_cost

14900行,宏CLASS_MAX_NREGS找出类别为CLASS的寄存器,用于模式为MODE的数据时,所需要的最大连续寄存器的数目。从14913行开始,对于不需要临时内存的情况,代价数据可以从ix86_cost得到。在具有整数类别的寄存器间移动数据的代价最小——为2。注意到如果在14888行,ix86_secondary_memory_needed返回false,而且满足14913行的条件,应该使用寄存器MMXSSE来存放整数(它满足ix86_secondary_memory_needed 14868行的条件)。

在通过REGISTER_MOVE_COST找出寄存器类别间的数据移动的代价后,把涉及类别中,具有最大代价的子类别的代价数据作为该类别的代价。

 

你可能感兴趣的:(Integer,Class,float,branch,parallel,Allocation)