Studying note of GCC-3.4.6 source (39)

4.1.4. Adjust options according to target

Returned from c_common_post_options, continue with process_options. Remember that input_filename accesses file field of input_location which tracks the current file being compiled.

 

process_options (continue)

 

4283   input_filename = main_input_filename;

4284

4285 #ifdef OVERRIDE_OPTIONS

4286   /* Some machines may reject certain combinations of options.  */

4287   OVERRIDE_OPTIONS;

4288 #endif

 

If the back-end has special requirment upon the options related to target, it needs define macro OVERRIDER_OPTIONS above at line 4285 to provide the handler. For x86 target, the macro is defined as below function.

 

1050 void

1051 override_options (void)                                                                                    in i386.c

1052 {

1053   int i;

1054   /* Comes from final.c -- no real reason to change it.  */

1055 #define MAX_CODE_ALIGN 16

1056

1057   static struct ptt

1058   {

1059     const struct processor_costs *cost;      /* Processor costs */

1060     const int target_enable;               /* Target flags to enable.  */

1061     const int target_disable;                     /* Target flags to disable.  */

1062     const int align_loop;                  /* Default alignments.  */

1063     const int align_loop_max_skip;

1064     const int align_jump;

1065     const int align_jump_max_skip;

1066     const int align_func;

1067   }

1068   const processor_target_table[PROCESSOR_max] =

1069   {

1070     {&i386_cost, 0, 0, 4, 3, 4, 3, 4},

1071     {&i486_cost, 0, 0, 16, 15, 16, 15, 16},

1072     {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},

1073     {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},

1074     {&k6_cost, 0, 0, 32, 7, 32, 7, 32},

1075     {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},

1076     {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},

1077     {&k8_cost, 0, 0, 16, 7, 16, 7, 16}

1078   };

1079

1080   static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;

1081   static struct pta

1082   {

1083     const char *const name;              /* processor name or nickname.  */

1084     const enum processor_type processor;

1085     const enum pta_flags

1086     {

1087       PTA_SSE = 1,

1088       PTA_SSE2 = 2,

1089       PTA_SSE3 = 4,

1090       PTA_MMX = 8,

1091       PTA_PREFETCH_SSE = 16,

1092       PTA_3DNOW = 32,

1093       PTA_3DNOW_A = 64,

1094       PTA_64BIT = 128

1095     } flags;

1096   }

1097   const processor_alias_table[] =

1098   {

1099     {"i386", PROCESSOR_I386, 0},

1100     {"i486", PROCESSOR_I486, 0},

1101     {"i586", PROCESSOR_PENTIUM, 0},

1102     {"pentium", PROCESSOR_PENTIUM, 0},

1103     {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},

1104     {"winchip-c6", PROCESSOR_I486, PTA_MMX},

1105     {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},

1106     {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},

1107     {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},

1108     {"i686", PROCESSOR_PENTIUMPRO, 0},

1109     {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},

1110     {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},

1111     {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},

1112     {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},

1113     {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},

1114     {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2

1115                             | PTA_MMX | PTA_PREFETCH_SSE},

1116     {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2

1117                              | PTA_MMX | PTA_PREFETCH_SSE},

1118     {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3

1119                              | PTA_MMX | PTA_PREFETCH_SSE},

1120     {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT

1121                          | PTA_MMX | PTA_PREFETCH_SSE},

1122     {"k6", PROCESSOR_K6, PTA_MMX},

1123     {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},

1124     {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},

1125     {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW

1126                        | PTA_3DNOW_A},

1127     {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE

1128                            | PTA_3DNOW | PTA_3DNOW_A},

1129     {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW

1130                         | PTA_3DNOW_A | PTA_SSE},

1131       {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW

1132                                  | PTA_3DNOW_A | PTA_SSE},

1133       {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW

1134                                  | PTA_3DNOW_A | PTA_SSE},

1135       {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT

1136                            | PTA_SSE | PTA_SSE2 },

1137       {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT

1138                                  | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},

1139       {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT

1140                                  | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},

1141       {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT

1142                                  | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},

1143       {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT

1144                                  | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},

1145     };

1146  

1147     int const pta_size = ARRAY_SIZE (processor_alias_table);

 

Above, processor_target_table and processor_alias_table have their types declared just before them so these types can’t be used otherwhere. TARGET_CPU_DEFAULT_NAMES at line 1080 defines names of CPU of the family.

 

710    #define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",/

711                                "pentiumpro", "pentium2", "pentium3", /

712                                "pentium4", "k6", "k6-2", "k6-3",/

713                                "athlon", "athlon-4", "k8", /

714                                "pentium-m", "prescott", "nocona"}

 

At line 1085 pta_flags, describes the attribution of register sets available upon certain chip.

 

override_options (continue)

 

1149   /* Set the default values for switches whose default depends on TARGET_64BIT

1150     in case they weren't overwritten by command line options.  */

1151   if (TARGET_64BIT)

1152   {

1153     if (flag_omit_frame_pointer == 2)

1154       flag_omit_frame_pointer = 1;

1155     if (flag_asynchronous_unwind_tables == 2)

1156       flag_asynchronous_unwind_tables = 1;

1157     if (flag_pcc_struct_return == 2)

1158       flag_pcc_struct_return = 0;

1159   }

1160   else

1161   {

1162     if (flag_omit_frame_pointer == 2)

1163       flag_omit_frame_pointer = 0;

1164     if (flag_asynchronous_unwind_tables == 2)

1165       flag_asynchronous_unwind_tables = 0;

1166     if (flag_pcc_struct_return == 2)

1167       flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;

1168   }

1169

1170 #ifdef SUBTARGET_OVERRIDE_OPTIONS

1171   SUBTARGET_OVERRIDE_OPTIONS;

1172 #endif

1173

1174   if (!ix86_tune_string && ix86_arch_string)

1175     ix86_tune_string = ix86_arch_string;

1176   if (!ix86_tune_string)

1177     ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];

1178   if (!ix86_arch_string)

1179     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";

1180

1181   if (ix86_cmodel_string != 0)

1182   {

1183     if (!strcmp (ix86_cmodel_string, "small"))

1184       ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;

1185     else if (flag_pic)

1186       sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);

1187     else if (!strcmp (ix86_cmodel_string, "32"))

1188       ix86_cmodel = CM_32;

1189     else if (!strcmp (ix86_cmodel_string, "kernel") && ! flag_pic)

1190       ix86_cmodel = CM_KERNEL;

1191     else if (!strcmp (ix86_cmodel_string, "medium") && ! flag_pic)

1192       ix86_cmodel = CM_MEDIUM;

1193     else if (!strcmp (ix86_cmodel_string, "large") && ! flag_pic)

1194       ix86_cmodel = CM_LARGE;

1195     else

1196       error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);

1197   }

1198   else

1199   {

1200     ix86_cmodel = CM_32;

1201     if (TARGET_64BIT)

1202       ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;

1203   }

1204   if (ix86_asm_string != 0)

1205   {

1206     if (!strcmp (ix86_asm_string, "intel"))

1207       ix86_asm_dialect = ASM_INTEL;

1208     else if (!strcmp (ix86_asm_string, "att"))

1209       ix86_asm_dialect = ASM_ATT;

1210     else

1211       error ("bad value (%s) for -masm= switch", ix86_asm_string);

1212   }

1213   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))

1214     error ("code model `%s' not supported in the %s bit mode",

1215      ix86_cmodel_string, TARGET_64BIT ? "64" : "32");

1216   if (ix86_cmodel == CM_LARGE)

1217     sorry ("code model `large' not supported yet");

1218   if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))

1219     sorry ("%i-bit mode not compiled in",

1220      (target_flags & MASK_64BIT) ? 64 : 32);

 

Above at line 1167, DEFAULT_PCC_STRUCT_RETURN is defined as 1 for x86 chip. And the default chip will be selected by TARGET_CPU_DEFAULT at line 1177 which is 0 for 32 bits chip (then it selects the most common name “i386”). And SUBTARGET_OVERRIDE_OPTIONS at line 1170 is undefined for x86 chips.

For those variables of ix86_*_string we have seen that they are assigned in set_target_switch. For those options the detail is given by [6] as below (confined with x86 architecture).

-mtune=cpu-type

Tune to cpu-type everything applicable about the generated code, except for the ABI and the set of available instructions. The choices for cpu-type are:

generic Produce code optimized for the most common IA32/AMD64/EM64T processors. If you know the CPU on which your code will run, then you should use the corresponding ‘-mtune’ option instead of ‘-mtune=generic’. But, if you do not know exactly what CPU users of your application will have, then you should use this option.

As new processors are deployed in the marketplace, the behavior of this option will change. Therefore, if you upgrade to a newer version of GCC, the code generated option will change to reflect the processors that were most common when that version of GCC was released.

There is no ‘-march=generic’ option because ‘-march’ indicates the instruction set the compiler can use, and there is no generic instruction set applicable to all processors. In contrast, ‘-mtune’ indicates the processor (or, in this case, collection of processors) for which the code is optimized.

native  This selects the CPU to tune for at compilation time by determining the processor type of the compiling machine. Using ‘-mtune=native’ will produce code optimized for the local machine under the constraints of the selected instruction set. Using ‘-march=native’ will enable all instruction subsets supported by the local machine (hence the result might not run on different machines).

i386   Original Intel’s i386 CPU.

i486   Intel’s i486 CPU. (No scheduling is implemented for this chip.)

i586, pentium

Intel Pentium CPU with no MMX support.

pentium-mmx

Intel PentiumMMX CPU based on Pentium core with MMX instruction set support.

pentiumpro

Intel PentiumPro CPU.

i686   Same as generic, but when used as march option, PentiumPro instruction set will be used, so the code will run on all i686 family chips.

pentium2 Intel Pentium2 CPU based on PentiumPro core with MMX instruction set support.

pentium3, pentium3m

Intel Pentium3 CPU based on PentiumPro core with MMX and SSE instruction set support.

pentium-m

Low power version of Intel Pentium3 CPU with MMX, SSE and SSE2 instruction set support.

pentium4, pentium4m

Intel Pentium4 CPU with MMX, SSE and SSE2 instruction set support.

prescott Improved version of Intel Pentium4 CPU with MMX, SSE, SSE2 and SSE3 instruction set support.

nocona Improved version of Intel Pentium4 CPU with 64-bit extensions, MMX, SSE, SSE2 and SSE3 instruction set support.

core2  Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 instruction set support.

k6     AMD K6 CPU with MMX instruction set support.

k6-2, k6-3 Improved versions of AMD K6 CPU with MMX and 3dNOW! Instruction set support.

athlon, athlon-tbird

AMD Athlon CPU with MMX, 3dNOW!, enhanced 3dNOW! and SSE prefetch instructions support.

athlon-4, athlon-xp, athlon-mp

Improved AMD Athlon CPU with MMX, 3dNOW!, enhanced 3dNOW! and full SSE instruction set support.

k8, opteron, athlon64, athlon-fx

AMD K8 core based CPUs with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, 3dNOW!, enhanced 3dNOW! and 64-bit instruction set extensions.)

k8-sse3, opteron-sse3, athlon64-sse3

Improved versions of k8, opteron and athlon64 with SSE3 instruction set support.

amdfam10, barcelona

AMD Family 10h core based CPUs with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, SSE3, SSE4A, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit instruction set extensions.)

winchip-c6

IDT Winchip C6 CPU, dealt in same way as i486 with additional MMX instruction set support.

winchip2 IDT Winchip2 CPU, dealt in same way as i486 with additionalMMX and 3dNOW! instruction set support.

c3     Via C3 CPU with MMX and 3dNOW! instruction set support. (No scheduling is implemented for this chip.)

c3-2   Via C3-2 CPU with MMX and SSE instruction set support. (No scheduling is implemented for this chip.)

geode  Embedded AMD CPU with MMX and 3dNOW! instruction set support.

 

While picking a specific cpu-type will schedule things appropriately for that particular chip, the compiler will not generate any code that does not run on the i386 without the ‘-march=cpu-type’ option being used.

 

-march=cpu-type

Generate instructions for the machine type cpu-type. The choices for cpu-type are the same as for ‘-mtune’. Moreover, specifying ‘-march=cpu-type’ implies ‘-mtune=cpu-type’.

 

-mcpu=cpu-type

A deprecated synonym for ‘-mtune’.

 

-masm=dialect

Output asm instructions using selected dialect. Supported choices are ‘intel’ or ‘att’ (the default one). Darwin does not support ‘intel’.

 

Below ‘-m’ switches are supported in addition to the above on AMD x86-64 processors in 64-bit environments.

-m32 -m64

Generate code for a 32-bit or 64-bit environment. The 32-bit environment sets int, long and pointer to 32 bits and generates code that runs on any i386 system. The 64-bit environment sets int to 32 bits and long and pointer to 64 bits and generates code for AMD’s x86-64 architecture. For darwin only the -m64 option turns off the ‘-fno-pic’ and ‘-mdynamic-no-pic’ options.

-mno-red-zone

Do not use a so called red zone for x86-64 code. The red zone is mandated by the x86-64 ABI, it is a 128-byte area beyond the location of the stack pointer that will not be modified by signal or interrupt handlers and therefore can be used for temporary data without adjusting the stack pointer. The flag ‘-mno-red-zone’ disables this red zone.

-mcmodel=small

Generate code for the small code model: the program and its symbols must be linked in the lower 2 GB of the address space. Pointers are 64 bits. Programs can be statically or dynamically linked. This is the default code model.

-mcmodel=kernel

Generate code for the kernel code model. The kernel runs in the negative 2 GB of the address space. This model has to be used for Linux kernel code.

-mcmodel=medium

Generate code for the medium model: The program is linked in the lower 2 GB of the address space but symbols can be located anywhere in the address space. Programs can be statically or dynamically linked, but building of shared libraries is not supported with the medium model.

-mcmodel=large

Generate code for the large model: This model makes no assumptions about addresses and sizes of sections.

Above paragraphes describe well the meaning of cmodel below.

 

108    enum cmodel {                                                                                        in i386.h

109      CM_32,    /* The traditional 32-bit ABI.  */

110       CM_SMALL,   /* Assumes all code and data fits in the low 31 bits.  */

111       CM_KERNEL,       /* Assumes all code and data fits in the high 31 bits.  */

112       CM_MEDIUM,       /* Assumes code fits in the low 31 bits; data unlimited.  */

113       CM_LARGE,   /* No assumptions.  */

114       CM_SMALL_PIC   /* Assumes code+data+got/plt fits in a 31 bit region.  */

115     };

 

override_options (continue)

 

1222   for (i = 0; i < pta_size; i++)

1223     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))

1224     {

1225       ix86_arch = processor_alias_table[i].processor;

1226       /* Default cpu tuning to the architecture.  */

1227       ix86_tune = ix86_arch;

1228       if (processor_alias_table[i].flags & PTA_MMX

1229          && !(target_flags_explicit & MASK_MMX))

1230         target_flags |= MASK_MMX;

1231       if (processor_alias_table[i].flags & PTA_3DNOW

1232          && !(target_flags_explicit & MASK_3DNOW))

1233         target_flags |= MASK_3DNOW;

1234       if (processor_alias_table[i].flags & PTA_3DNOW_A

1235          && !(target_flags_explicit & MASK_3DNOW_A))

1236         target_flags |= MASK_3DNOW_A;

1237       if (processor_alias_table[i].flags & PTA_SSE

1238          && !(target_flags_explicit & MASK_SSE))

1239         target_flags |= MASK_SSE;

1240       if (processor_alias_table[i].flags & PTA_SSE2

1241          && !(target_flags_explicit & MASK_SSE2))

1242         target_flags |= MASK_SSE2;

1243       if (processor_alias_table[i].flags & PTA_SSE3

1244          && !(target_flags_explicit & MASK_SSE3))

1245         target_flags |= MASK_SSE3;

1246       if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)

1247         x86_prefetch_sse = true;

1248       if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))

1249         error ("CPU you selected does not support x86-64 instruction set");

1250       break;

1251     }

1252

1253   if (i == pta_size)

1254     error ("bad value (%s) for -march= switch", ix86_arch_string);

1255

1256   for (i = 0; i < pta_size; i++)

1257     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))

1258     {

1259       ix86_tune = processor_alias_table[i].processor;

1260       if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))

1261         error ("CPU you selected does not support x86-64 instruction set");

1262

1263       /* Intel CPUs have always interpreted SSE prefetch instructions as

1264         NOPs; so, we can enable SSE prefetch instructions even when

1265         -mtune (rather than -march) points us to a processor that has them.

1266         However, the VIA C3 gives a SIGILL, so we only do that for i686 and

1267         higher processors.  */

1268       if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))

1269         x86_prefetch_sse = true;

1270       break;

1271     }

1272   if (i == pta_size)

1273     error ("bad value (%s) for -mtune= switch", ix86_tune_string);

 

Above target_flags_explicit is also set in set_target_switch. It records whether certain switch is on or off. Thus the compiler can complete the options with knowledge in processor_alias_table.

 

override_options (continue)

 

1275   if (optimize_size)

1276     ix86_cost = &size_cost;

1277   else

1278     ix86_cost = processor_target_table[ix86_tune].cost;

1279   target_flags |= processor_target_table[ix86_tune].target_enable;

1280   target_flags &= ~processor_target_table[ix86_tune].target_disable;

1281

1282   /* Arrange to set up i386_stack_locals for all functions.  */

1283   init_machine_status = ix86_init_machine_status;

1284

1285   /* Validate -mregparm= value.  */

1286   if (ix86_regparm_string)

1287   {

1288     i = atoi (ix86_regparm_string);

1289     if (i < 0 || i > REGPARM_MAX)

1290       error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);

1291     else

1292       ix86_regparm = i;

1293   }

1294   else

1295    if (TARGET_64BIT)

1296      ix86_regparm = REGPARM_MAX;

1297

1298   /* If the user has provided any of the -malign-* options,

1299     warn and use that value only if -falign-* is not set.

1300     Remove this code in GCC 3.2 or later.  */

1301   if (ix86_align_loops_string)

1302   {

1303     warning ("-malign-loops is obsolete, use -falign-loops");

1304     if (align_loops == 0)

1305     {

1306       i = atoi (ix86_align_loops_string);

1307       if (i < 0 || i > MAX_CODE_ALIGN)

1308         error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);

1309       else

1310         align_loops = 1 << i;

1311     }

1312   }

1313

1314   if (ix86_align_jumps_string)

1315   {

1316     warning ("-malign-jumps is obsolete, use -falign-jumps");

1317     if (align_jumps == 0)

1318     {

1319       i = atoi (ix86_align_jumps_string);

1320       if (i < 0 || i > MAX_CODE_ALIGN)

1321         error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);

1322       else

1323         align_jumps = 1 << i;

1324     }

1325   }

1326

1327   if (ix86_align_funcs_string)

1328   {

1329     warning ("-malign-functions is obsolete, use -falign-functions");

1330     if (align_functions == 0)

1331     {

1332       i = atoi (ix86_align_funcs_string);

1333       if (i < 0 || i > MAX_CODE_ALIGN)

1334         error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);

1335       else

1336         align_functions = 1 << i;

1337     }

1338   }

1339

1340   /* Default align_* from the processor table.  */

1341   if (align_loops == 0)

1342   {

1343     align_loops = processor_target_table[ix86_tune].align_loop;

1344     align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;

1345   }

1346   if (align_jumps == 0)

1347   {

1348     align_jumps = processor_target_table[ix86_tune].align_jump;

1349     align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;

1350   }

1351   if (align_functions == 0)

1352   {

1353     align_functions = processor_target_table[ix86_tune].align_func;

1354   }

1355

1356   /* Validate -mpreferred-stack-boundary= value, or provide default.

1357     The default of 128 bits is for Pentium III's SSE __m128, but we

1358     don't want additional code to keep the stack aligned when

1359     optimizing for code size.  */

1360   ix86_preferred_stack_boundary = (optimize_size

1361                        ? TARGET_64BIT ? 128 : 32

1362                        : 128);

1363   if (ix86_preferred_stack_boundary_string)

1364   {

1365     i = atoi (ix86_preferred_stack_boundary_string);

1366     if (i < (TARGET_64BIT ? 4 : 2) || i > 12)

1367       error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,

1368          TARGET_64BIT ? 4 : 2);

1369     else

1370       ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;

1371   }

1372

1373   /* Validate -mbranch-cost= value, or provide default.  */

1374   ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;

1375   if (ix86_branch_cost_string)

1376   {

1377     i = atoi (ix86_branch_cost_string);

1378     if (i < 0 || i > 5)

1379       error ("-mbranch-cost=%d is not between 0 and 5", i);

1380     else

1381       ix86_branch_cost = i;

1382   }

1383

1384   if (ix86_tls_dialect_string)

1385   {

1386     if (strcmp (ix86_tls_dialect_string, "gnu") == 0)

1387       ix86_tls_dialect = TLS_DIALECT_GNU;

1388     else if (strcmp (ix86_tls_dialect_string, "sun") == 0)

1389       ix86_tls_dialect = TLS_DIALECT_SUN;

1390     else

1391       error ("bad value (%s) for -mtls-dialect= switch",

1392          ix86_tls_dialect_string);

1393   }

 

For Intel x86, in target options referred in above code, those available in the intel chip is given in below from [6]. Above, REGPARM_MAX is 3 for 32 bits chip and MAX_CODE_ALIGN is 16 defined at the beginning of the function.

-mregparm=num

Control how many registers are used to pass integer arguments. By default, no registers are used to pass arguments, and at most 3 registers can be used. You can control this behavior for a specific function by using the function attribute ‘regparm’.

Warning: if you use this switch, and num is nonzero, then you must build all modules with the same value, including any libraries. This includes the system libraries and startup modules.

-mpreferred-stack-boundary=num

Attempt to keep the stack boundary aligned to a 2 raised to num byte boundary. If ‘-mpreferred-stack-boundary’ is not specified, the default is 4 (16 bytes or 128 bits).

On Pentium and PentiumPro, double and long double values should be aligned to an 8 byte boundary (see ‘-malign-double’) or suffer significant run time performance penalties. On Pentium III, the Streaming SIMD Extension (SSE) data type __m128 may not work properly if it is not 16 byte aligned.

To ensure proper alignment of this value on the stack, the stack boundary must be as aligned as that required by any value stored on the stack. Further, every function must be generated such that it keeps the stack aligned. Thus calling a function compiled with a higher preferred stack boundary from a function compiled with a lower preferred stack boundary will most likely misalign the stack. It is recommended that libraries that use callbacks always use the default setting.

This extra alignment does consume extra stack space, and generally increases code size. Code that is sensitive to stack space usage, such as embedded systems and operating system kernels, may want to reduce the preferred alignment to ‘-mpreferred-stack-boundary=2’.

 

override_options (continue)

 

1395   /* Keep nonleaf frame pointers.  */

1396   if (TARGET_OMIT_LEAF_FRAME_POINTER)

1397     flag_omit_frame_pointer = 1;

1398

1399   /* If we're doing fast math, we don't care about comparison order

1400     wrt NaNs. This lets us use a shorter comparison sequence.  */

1401   if (flag_unsafe_math_optimizations)

1402     target_flags &= ~MASK_IEEE_FP;

1403

1404   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,

1405     since the insns won't need emulation.  */

1406   if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))

1407     target_flags &= ~MASK_NO_FANCY_MATH_387;

1408

1409   /* Turn on SSE2 builtins for -msse3.  */

1410   if (TARGET_SSE3)

1411     target_flags |= MASK_SSE2;

1412

1413   /* Turn on SSE builtins for -msse2.  */

1414   if (TARGET_SSE2)

1415     target_flags |= MASK_SSE;

1416

1417   if (TARGET_64BIT)

1418   {

1419     if (TARGET_ALIGN_DOUBLE)

1420       error ("-malign-double makes no sense in the 64bit mode");

1421     if (TARGET_RTD)

1422       error ("-mrtd calling convention not supported in the 64bit mode");

1423     /* Enable by default the SSE and MMX builtins.  */

1424     target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);

1425     ix86_fpmath = FPMATH_SSE;

1426   }

1427   else

1428   {

1429     ix86_fpmath = FPMATH_387;

1430     /* i386 ABI does not specify red zone. It still makes sense to use it

1431       when programmer takes care to stack from being destroyed.  */

1432     if (!(target_flags_explicit & MASK_NO_RED_ZONE))

1433       target_flags |= MASK_NO_RED_ZONE;

1434   }

1435

1436   if (ix86_fpmath_string != 0)

1437   {

1438     if (! strcmp (ix86_fpmath_string, "387"))

1439       ix86_fpmath = FPMATH_387;

1440     else if (! strcmp (ix86_fpmath_string, "sse"))

1441     {

1442       if (!TARGET_SSE)

1443       {

1444         warning ("SSE instruction set disabled, using 387 arithmetics");

1445         ix86_fpmath = FPMATH_387;

1446       }

1447       else

1448         ix86_fpmath = FPMATH_SSE;

1449     }

1450     else if (! strcmp (ix86_fpmath_string, "387,sse")

1451          || ! strcmp (ix86_fpmath_string, "sse,387"))

1452     {

1453       if (!TARGET_SSE)

1454       {

1455         warning ("SSE instruction set disabled, using 387 arithmetics");

1456         ix86_fpmath = FPMATH_387;

1457       }

1458       else if (!TARGET_80387)

1459       {

1460         warning ("387 instruction set disabled, using SSE arithmetics");

1461         ix86_fpmath = FPMATH_SSE;

1462       }

1463       else

1464         ix86_fpmath = FPMATH_SSE | FPMATH_387;

1465     }

1466     else

1467       error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);

1468   }

 

Above ix86_fpmath_string is used for following options [6]

-mfpmath=unit

Generate floating point arithmetics for selected unit. The choices for unit

are:

387  Use the standard 387 floating point coprocessor present majority of chips and emulated otherwise. Code compiled with this option will run almost everywhere. The temporary results are computed in 80bit precision instead of precision specified by the type resulting in slightly different results compared to most of other chips. See ‘-ffloat-store’ for more detailed description. This is the default choice for i386 compiler.

sse  Use scalar floating point instructions present in the SSE instruction set. This instruction set is supported by Pentium3 and newer chips, in the AMD line by Athlon-4, Athlon-xp and Athlon-mp chips. The earlier version of SSE instruction set supports only single precision arithmetics, thus the double and extended precision arithmetics is still done using 387. Later version, present only in Pentium4 and the future AMD x86-64 chips supports double precision arithmetics too.

For the i386 compiler, you need to use ‘-march=cpu-type’, ‘-msse’ or ‘-msse2’ switches to enable SSE extensions and make this option effective. For the x86-64 compiler, these extensions are enabled by default.

The resulting code should be considerably faster in the majority of cases and avoid the numerical instability problems of 387 code, but may break some existing code that expects temporaries to be 80bit. This is the default choice for the x86-64 compiler.

sse,387 Attempt to utilize both instruction sets at once. This effectively double the amount of available registers and on chips with separate execution units for 387 and SSE the execution resources too. Use this option with care, as it is still experimental, because the GCC register allocator does not model separate functional units well resulting in instable performance.

 

override_options (continue)

 

1470   /* It makes no sense to ask for just SSE builtins, so MMX is also turned

1471     on by -msse.  */

1472   if (TARGET_SSE)

1473   {

1474     target_flags |= MASK_MMX;

1475     x86_prefetch_sse = true;

1476   }

1477

1478   /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */

1479   if (TARGET_3DNOW)

1480   {

1481     target_flags |= MASK_MMX;

1482     /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX

1483       extensions it adds.  */

1484     if (x86_3dnow_a & (1 << ix86_arch))

1485       target_flags |= MASK_3DNOW_A;

1486   }

1487   if ((x86_accumulate_outgoing_args & TUNEMASK)

1488       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)

1489       && !optimize_size)

1490     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;

1491

1492   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */

1493   {

1494     char *p;

1495     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);

1496     p = strchr (internal_label_prefix, 'X');

1497     internal_label_prefix_len = p - internal_label_prefix;

1498     *p = '/0';

1499   }

1500 }

 

Above at line 1487, x86_accumulate_outgoing_args is defined as:

 

507    const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;   in i386.c

 

In which, for example, m_ATHLON_K8 is defined in below.

 

470    #define m_K8  (1<<PROCESSOR_K8)                                                           in i386.c

471    #define m_ATHLON_K8  (m_K8 | m_ATHLON)

 

PROCESSOR_K8 is one of values of enum processor_type. Obviously, variables like x86_accumulate_outgoing_args define the enable chips for the characteristics.

At line 1495, in Linux, ASM_GENERATE_INTERNAL_LABEL is defined as:

 

213    #undef ASM_GENERATE_INTERNAL_LABEL                                        in linux.h

214    #define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)    /

215      sprintf (LABEL, "*.L%s%ld", PREFIX, (long)(NUM))

 

So internal_label_prefix will be “*.LLX” (‘/0’ will replace ‘0’ at line 1498).

 

你可能感兴趣的:(String,table,compiler,asynchronous,64bit,loops)