Studying note of GCC-3.4.6 source (44)

4.2.3.3.    Determine cost of moving data between registers

Following, init_reg_sets_1 evaluates the cost of moving data between registers of different class, then between register and memory.

 

init_reg_sets_1 (continue)

 

477   /* Initialize the move cost table. Find every subset of each class

478      and take the maximum cost of moving any subset to any other.  */

479 

480    for (m = 0; m < (unsigned int) MAX_MACHINE_MODE; m++)

481      if (allocatable_regs_of_mode [m])

482      {

483        for (i = 0; i < N_REG_CLASSES; i++)

484          if (contains_reg_of_mode [i][m])

485            for (j = 0; j < N_REG_CLASSES; j++)

486            {

487              int cost;

488              enum reg_class *p1, *p2;

489 

490              if (!contains_reg_of_mode [j][m])

491              {

492                move_cost[m][i][j] = 65536;

493                may_move_in_cost[m][i][j] = 65536;

494                may_move_out_cost[m][i][j] = 65536;

495              }

496              else

497              {

498                cost = REGISTER_MOVE_COST (m, i, j);

499 

500                for (p2 = &reg_class_subclasses[j][0];

501                    *p2 != LIM_REG_CLASSES;

502                    p2++)

503                  if (*p2 != i && contains_reg_of_mode [*p2][m])

504                    cost = MAX (cost, move_cost [m][i][*p2]);

505 

506                for (p1 = &reg_class_subclasses[i][0];

507                    *p1 != LIM_REG_CLASSES;

508                    p1++)

509                  if (*p1 != j && contains_reg_of_mode [*p1][m])

510                    cost = MAX (cost, move_cost [m][*p1][j]);

511  

512                move_cost[m][i][j] = cost;

513 

514                if (reg_class_subset_p (i, j))

515                  may_move_in_cost[m][i][j] = 0;

516                else

517                  may_move_in_cost[m][i][j] = cost;

518 

519                if (reg_class_subset_p (j, i))

520                  may_move_out_cost[m][i][j] = 0;

521                else

522                  may_move_out_cost[m][i][j] = cost;

523              }

524           }

525          else

526            for (j = 0; j < N_REG_CLASSES; j++)

527            {

528              move_cost[m][i][j] = 65536;

529              may_move_in_cost[m][i][j] = 65536;

530              may_move_out_cost[m][i][j] = 65536;

531            }

532      }

533  }

 

No doubt, at moving data between registers, it has different cost for different target. Some movings are not possible as the target is not allowed, and some may need temparory saved in memory. Above, move_cost is used to record maximum cost of moving from a register in one class to a register in another class. For permitted moving, macro REGISTER_MOVE_COST evaulates the cost.

 

2645 #define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) /                    in i386.h

2646    ix86_register_move_cost ((MODE), (CLASS1), (CLASS2))

 

14880 int

14881 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,      in i386.c

14882            enum reg_class class2)

14883 {

14884   /* In case we require secondary memory, compute cost of the store followed

14885     by load. In order to avoid bad register allocation choices, we need

14886     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */

14887

14888   if (ix86_secondary_memory_needed (class1, class2, mode, 0))

14889   {

14890     int cost = 1;

14891

14892     cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),

14893        MEMORY_MOVE_COST (mode, class1, 1));

14894     cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),

14895        MEMORY_MOVE_COST (mode, class2, 1));

14896

14897     /* In case of copying from general_purpose_register we may emit multiple

14898       stores followed by single load causing memory size mismatch stall.

14899       Count this as arbitrarily high cost of 20.  */

14900     if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))

14901       cost += 20;

14902

14903     /* In the case of FP/MMX moves, the registers actually overlap, and we

14904       have to switch modes in order to treat them differently.  */

14905     if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))

14906           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))

14907       cost += 20;

14908

14909       return cost;

14910   }

14911

14912   /* Moves between SSE/MMX and integer unit are expensive.  */

14913   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)

14914       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))

14915     return ix86_cost->mmxsse_to_integer;

14916   if (MAYBE_FLOAT_CLASS_P (class1))

14917     return ix86_cost->fp_move;

14918   if (MAYBE_SSE_CLASS_P (class1))

14919     return ix86_cost->sse_move;

14920   if (MAYBE_MMX_CLASS_P (class1))

14921     return ix86_cost->mmx_move;

14922   return 2;

14923 }

 

Above, at line 14888, ix86_secondary_memory_needed checks if memory needed for moving data between the two register classes.

 

14852 Int                                                                                                                       in i386.c

14853 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,

14854                  enum machine_mode mode, int strict)

14855 {

14856   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)

14857       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)

14858       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)

14859       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)

14860       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)

14861       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))

14862   {

14863     if (strict)

14864       abort ();

14865     else

14866       return 1;

14867   }

14868   return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)

14869      || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)

14870      || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))

14871          && ((mode != SImode && (mode != DImode || !TARGET_64BIT))

14872             || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));

14873 }

 

Note that the last parameter of the function is 0 here. And it is the only place this function is called. Macro MAYBE_FLOAT_CLASS_P checks if the class the register belongs to has intersection with FLOAT_REG class. And the macro FLOAT_CLASS_P checks if the class the register belongs to is contained by FLOAT_REG class. Other macros are similar. The definitions of FLOAT_CLASS_P and MAYBE_FLOAT_CLASS_P are as below:

 

1318 #define MAYBE_FLOAT_CLASS_P(CLASS) /                                        in i386.h

1319   reg_classes_intersect_p ((CLASS), FLOAT_REGS)

 

1310 #define FLOAT_CLASS_P(CLASS) /

1311   reg_class_subset_p ((CLASS), FLOAT_REGS)

 

2545 int

2546 reg_classes_intersect_p (enum reg_class c1, enum reg_class c2)                      in regclass.c

2547 {

2548   HARD_REG_SET c;

2549

2550   if (c1 == c2) return 1;

2551

2552   if (c1 == ALL_REGS || c2 == ALL_REGS)

2553     return 1;

2554

2555   COPY_HARD_REG_SET (c, reg_class_contents[(int) c1]);

2556   AND_HARD_REG_SET (c, reg_class_contents[(int) c2]);

2557

2558   GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);

2559   return 1;

2560

2561 lose:

2562   return 0;

2563 }

 

2529 int

2530 reg_class_subset_p (enum reg_class c1, enum reg_class c2)                           in regclass.c

2531 {

2532   if (c1 == c2) return 1;

2533

2534   if (c2 == ALL_REGS)

2535 win:

2536     return 1;

2537   GO_IF_HARD_REG_SUBSET (reg_class_contents[(int) c1],

2538       reg_class_contents[(int) c2],

2539       win);

2540   return 0;

2541 }

 

So in ix86_register_move_cost, at line 14888, if class1 has not intersection with the class which class2 belongs to, or vice versa; or if classes1 and class2 don’t belong to the same class, ix86_secondary_memory_needed will return true, and we need memory for the moving between these reigsters. The system first needs place the value into memory from register1, then put this value from memory into register2, as the encoding of the value for these two registers are different, direct copy is not appropriate. So we need to count the cost twice, one for moving out of the register1, the other for moving into register2. And being conservation, we just take the max cost operation of each class into account. Now let’s see how MEMORY_MOVE_COST works.

 

2656 #define MEMORY_MOVE_COST(MODE, CLASS, IN)   /                           in i386.h

2657   ix86_memory_move_cost ((MODE), (CLASS), (IN))

 

14984 int                                                                                                          in i386.c

14985 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)

14986 {

14987   if (FLOAT_CLASS_P (class))

14988   {

14989     int index;

14990     switch (mode)

14991     {

14992       case SFmode:

14993         index = 0;

14994         break;

14995       case DFmode:

14996         index = 1;

14997         break;

14998       case XFmode:

14999         index = 2;

15000         break;

15001       default:

15002         return 100;

15003     }

15004     return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];

15005   }

15006   if (SSE_CLASS_P (class))

15007   {

15008     int index;

15009     switch (GET_MODE_SIZE (mode))

15010     {

15011       case 4:

15012         index = 0;

15013         break;

15014       case 8:

15015         index = 1;

15016         break;

15017       case 16:

15018         index = 2;

15019         break;

15020       default:

15021         return 100;

15022     }

15023     return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];

15024   }

15025   if (MMX_CLASS_P (class))

15026   {

15027     int index;

15028     switch (GET_MODE_SIZE (mode))

15029     {

15030       case 4:

15031         index = 0;

15032         break;

15033       case 8:

15034         index = 1;

15035         break;

15036       default:

15037         return 100;

15038     }

15039     return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];

15040   }

15041   switch (GET_MODE_SIZE (mode))

15042   {

15043     case 1:

15044       if (in)

15045         return (Q_CLASS_P (class) ? ix86_cost->int_load[0]

15046              : ix86_cost->movzbl_load);

15047       else

15048         return (Q_CLASS_P (class) ? ix86_cost->int_store[0]

15049              : ix86_cost->int_store[0] + 4);

15050       break;

15051     case 2:

15052       return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];

15053     default:

15054       /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */

15055       if (mode == TFmode)

15056         mode = XFmode;

15057       return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])

15058            * (((int) GET_MODE_SIZE (mode)

15059            + UNITS_PER_WORD - 1) / UNITS_PER_WORD));

15060   }

15061 }

 

The key data structure here is ix86_cost, which records cost relate to operation with certain processor. For x86 machine, we get following definition:

 

416  static const                                                                                                       in i386.c

417  struct processor_costs pentium4_cost = {

418    1,                                   /* cost of an add instruction */

419    1,                                   /* cost of a lea instruction */

420    4,                                   /* variable shift costs */

421    4,                                   /* constant shift costs */

422    {15, 15, 15, 15, 15},        /* cost of starting a multiply */

423    0,                                   /* cost of multiply per each bit set */

424    {56, 56, 56, 56, 56},        /* cost of a divide/mod */

425    1,                                   /* cost of movsx */

426    1,                                   /* cost of movzx */

427    16,                                  /* "large" insn */

428    6,                                   /* MOVE_RATIO */

429    2,                                   /* cost for loading QImode using movzbl */

430    {4, 5, 4},                        /* cost of loading integer registers

431                                    in QImode, HImode and SImode.

432                                    Relative to reg-reg move (2).  */

433    {2, 3, 2},                        /* cost of storing integer registers */

434    2,                                   /* cost of reg,reg fld/fst */

435    {2, 2, 6},                        /* cost of loading fp registers

436                                    in SFmode, DFmode and XFmode */

437    {4, 4, 6},                        /* cost of loading integer registers */

438    2,                                   /* cost of moving MMX register */

439    {2, 2},                            /* cost of loading MMX registers

440                                    in SImode and DImode */

441    {2, 2},                            /* cost of storing MMX registers

442                                    in SImode and DImode */

443    12,                                  /* cost of moving SSE register */

444    {12, 12, 12},                   /* cost of loading SSE registers

445                                    in SImode, DImode and TImode */

446    {2, 2, 8},                        /* cost of storing SSE registers

447                                    in SImode, DImode and TImode */

448    10,                                  /* MMX or SSE register to integer */

449    64,                                  /* size of prefetch block */

450    6,                                   /* number of parallel prefetches */

451    2,                                   /* Branch cost */

452    5,                                   /* cost of FADD and FSUB insns.  */

453    7,                                   /* cost of FMUL instruction.  */

454    43,                                  /* cost of FDIV instruction.  */

455    2,                                   /* cost of FABS instruction.  */

456    2,                                   /* cost of FCHS instruction.  */

457    43,                                  /* cost of FSQRT instruction.  */

458  };

459 

460  const struct processor_costs *ix86_cost = &pentium_cost;

 

And the definition of processor_costs is:

 

39    struct processor_costs {                                                                              in i386.h

40      const int add;                   /* cost of an add instruction */

41      const int lea;                    /* cost of a lea instruction */

42      const int shift_var;            /* variable shift costs */

43      const int shift_const;  /* constant shift costs */

44      const int mult_init[5];       /* cost of starting a multiply

45                                   in QImode, HImode, SImode, DImode, TImode*/

46      const int mult_bit;            /* cost of multiply per each bit set */

47      const int divide[5];           /* cost of a divide/mod

48                                   in QImode, HImode, SImode, DImode, TImode*/

49      int movsx;                       /* The cost of movsx operation.  */

50      int movzx;                /* The cost of movzx operation.  */

51      const int large_insn;          /* insns larger than this cost more */

52      const int move_ratio; /* The threshold of number of scalar

53                                   memory-to-memory move insns.  */

54      const int movzbl_load;      /* cost of loading using movzbl */

55      const int int_load[3]; /* cost of loading integer registers

56                                   in QImode, HImode and SImode relative

57                                   to reg-reg move (2).  */

58      const int int_store[3]; /* cost of storing integer register

59                                   in QImode, HImode and SImode */

60      const int fp_move;            /* cost of reg,reg fld/fst */

61      const int fp_load[3];         /* cost of loading FP register

62                                   in SFmode, DFmode and XFmode */

63      const int fp_store[3]; /* cost of storing FP register

64                                   in SFmode, DFmode and XFmode */

65      const int mmx_move;       /* cost of moving MMX register.  */

66      const int mmx_load[2];     /* cost of loading MMX register

67                                   in SImode and DImode */

68      const int mmx_store[2];    /* cost of storing MMX register

69                                   in SImode and DImode */

70      const int sse_move;           /* cost of moving SSE register.  */

71      const int sse_load[3]; /* cost of loading SSE register

72                                   in SImode, DImode and TImode*/

73      const int sse_store[3];       /* cost of storing SSE register

74                                   in SImode, DImode and TImode*/

75      const int mmxsse_to_integer;    /* cost of moving mmxsse register to

76                                       integer and vice versa.  */

77      const int prefetch_block;   /* bytes moved to cache for prefetch.  */

78      const int simultaneous_prefetches; /* number of parallel prefetch

79                                           operations.  */

80      const int branch_cost;       /* Default value for BRANCH_COST.  */

81      const int fadd;                  /* cost of FADD and FSUB instructions.  */

82      const int fmul;                 /* cost of FMUL instruction.  */

83      const int fdiv;                  /* cost of FDIV instruction.  */

84      const int fabs;                  /* cost of FABS instruction.  */

85      const int fchs;                  /* cost of FCHS instruction.  */

86      const int fsqrt;                  /* cost of FSQRT instruction.  */

87    };

 

From this definition, we can know that most data must be predefined and offered beforehand. To do a compiler, we must know the cpu well.

Then at line 14892 and 14894 in ix86_register_move_cost, it gets the larger cost between two classes in two different directions as the wanted cost and saves it into move_cost.

At line 14900, macro CLASS_MAX_NREGS finds out maximum number of consecutive registers of CLASS needed to represent MODE. From line 14913, for case no secondary memory no needed, the data can be gotten from ix86_cost.The data moving between register of integer class has the smallest cost – 2. Notice that if at line 14888, ix86_secondary_memory_needed returns false, and if it satisfies condition at line 14913, it should be using MMX or SSE register to hold integer (it satisfies condition at line 14868 in ix86_secondary_memory_needed.

After finding out the move cost between register classes by REGISTER_MOVE_COST, selects maxium cost of subclasses of the classes involved as the move cost for the class.

 

你可能感兴趣的:(Integer,Class,float,branch,parallel,Allocation)