GCC-3.4.6源代码学习笔记(39)

4.1.4. 根据目标平台调整选项

c_common_post_options返回,继续process_options。回忆input_filename访问input_locationfile域,这个域记录了当前正在编译的文件。

 

process_options (continue)

 

4283   input_filename = main_input_filename;

4284

4285 #ifdef OVERRIDE_OPTIONS

4286   /* Some machines may reject certain combinations of options.  */

4287   OVERRIDE_OPTIONS;

4288 #endif

 

如果后端对于与目标平台相关的编译选项有特定的要求,则定义上面4285行的宏OVERRIDER_OPTIONS,提供自己的处理句柄。对于x86目标平台,这个宏被定义为以下的函数。

 

1050 void

1051 override_options (void)                                                                                    in i386.c

1052 {

1053   int i;

1054   /* Comes from final.c -- no real reason to change it.  */

1055 #define MAX_CODE_ALIGN 16

1056

1057   static struct ptt

1058   {

1059     const struct processor_costs *cost;      /* Processor costs */

1060     const int target_enable;               /* Target flags to enable.  */

1061     const int target_disable;                     /* Target flags to disable.  */

1062     const int align_loop;                  /* Default alignments.  */

1063     const int align_loop_max_skip;

1064     const int align_jump;

1065     const int align_jump_max_skip;

1066     const int align_func;

1067   }

1068   const processor_target_table[PROCESSOR_max] =

1069   {

1070     {&i386_cost, 0, 0, 4, 3, 4, 3, 4},

1071     {&i486_cost, 0, 0, 16, 15, 16, 15, 16},

1072     {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},

1073     {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},

1074     {&k6_cost, 0, 0, 32, 7, 32, 7, 32},

1075     {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},

1076     {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},

1077     {&k8_cost, 0, 0, 16, 7, 16, 7, 16}

1078   };

1079

1080   static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;

1081   static struct pta

1082   {

1083     const char *const name;              /* processor name or nickname.  */

1084     const enum processor_type processor;

1085     const enum pta_flags

1086     {

1087       PTA_SSE = 1,

1088       PTA_SSE2 = 2,

1089       PTA_SSE3 = 4,

1090       PTA_MMX = 8,

1091       PTA_PREFETCH_SSE = 16,

1092       PTA_3DNOW = 32,

1093       PTA_3DNOW_A = 64,

1094       PTA_64BIT = 128

1095     } flags;

1096   }

1097   const processor_alias_table[] =

1098   {

1099     {"i386", PROCESSOR_I386, 0},

1100     {"i486", PROCESSOR_I486, 0},

1101     {"i586", PROCESSOR_PENTIUM, 0},

1102     {"pentium", PROCESSOR_PENTIUM, 0},

1103     {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},

1104     {"winchip-c6", PROCESSOR_I486, PTA_MMX},

1105     {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},

1106     {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},

1107     {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},

1108     {"i686", PROCESSOR_PENTIUMPRO, 0},

1109     {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},

1110     {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},

1111     {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},

1112     {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},

1113     {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},

1114     {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2

1115                             | PTA_MMX | PTA_PREFETCH_SSE},

1116     {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2

1117                              | PTA_MMX | PTA_PREFETCH_SSE},

1118     {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3

1119                              | PTA_MMX | PTA_PREFETCH_SSE},

1120     {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT

1121                         | PTA_MMX | PTA_PREFETCH_SSE},

1122     {"k6", PROCESSOR_K6, PTA_MMX},

1123     {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},

1124     {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},

1125     {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW

1126                        | PTA_3DNOW_A},

1127     {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE

1128                            | PTA_3DNOW | PTA_3DNOW_A},

1129     {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW

1130                         | PTA_3DNOW_A | PTA_SSE},

1131       {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW

1132                                   | PTA_3DNOW_A | PTA_SSE},

1133       {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW

1134                                   | PTA_3DNOW_A | PTA_SSE},

1135       {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT

1136                             | PTA_SSE | PTA_SSE2 },

1137       {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT

1138                                   | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},

1139       {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT

1140                                   | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},

1141       {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT

1142                                   | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},

1143       {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT

1144                                   | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},

1145     };

1146  

1147     int const pta_size = ARRAY_SIZE (processor_alias_table);

 

上面,processor_target_tableprocessor_alias_table的类型定义在它们之前,因此这些类型不能用于别处。在1080行的TARGET_CPU_DEFAULT_NAMES定义了CPU族。

 

710    #define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",/

711                                "pentiumpro", "pentium2", "pentium3", /

712                                "pentium4", "k6", "k6-2", "k6-3",/

713                                "athlon", "athlon-4", "k8", /

714                                "pentium-m", "prescott", "nocona"}

 

1085行的pta_flags描述了特定芯片上可用的寄存器集的属性。

 

override_options (continue)

 

1149   /* Set the default values for switches whose default depends on TARGET_64BIT

1150     in case they weren't overwritten by command line options.  */

1151   if (TARGET_64BIT)

1152   {

1153     if (flag_omit_frame_pointer == 2)

1154       flag_omit_frame_pointer = 1;

1155     if (flag_asynchronous_unwind_tables == 2)

1156       flag_asynchronous_unwind_tables = 1;

1157     if (flag_pcc_struct_return == 2)

1158       flag_pcc_struct_return = 0;

1159   }

1160   else

1161   {

1162     if (flag_omit_frame_pointer == 2)

1163       flag_omit_frame_pointer = 0;

1164     if (flag_asynchronous_unwind_tables == 2)

1165       flag_asynchronous_unwind_tables = 0;

1166     if (flag_pcc_struct_return == 2)

1167       flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;

1168   }

1169

1170 #ifdef SUBTARGET_OVERRIDE_OPTIONS

1171   SUBTARGET_OVERRIDE_OPTIONS;

1172 #endif

1173

1174   if (!ix86_tune_string && ix86_arch_string)

1175     ix86_tune_string = ix86_arch_string;

1176   if (!ix86_tune_string)

1177     ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];

1178   if (!ix86_arch_string)

1179     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";

1180

1181   if (ix86_cmodel_string != 0)

1182   {

1183     if (!strcmp (ix86_cmodel_string, "small"))

1184       ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;

1185     else if (flag_pic)

1186       sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);

1187     else if (!strcmp (ix86_cmodel_string, "32"))

1188       ix86_cmodel = CM_32;

1189     else if (!strcmp (ix86_cmodel_string, "kernel") && ! flag_pic)

1190       ix86_cmodel = CM_KERNEL;

1191     else if (!strcmp (ix86_cmodel_string, "medium") && ! flag_pic)

1192       ix86_cmodel = CM_MEDIUM;

1193     else if (!strcmp (ix86_cmodel_string, "large") && ! flag_pic)

1194       ix86_cmodel = CM_LARGE;

1195     else

1196       error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);

1197   }

1198   else

1199   {

1200     ix86_cmodel = CM_32;

1201     if (TARGET_64BIT)

1202       ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;

1203   }

1204   if (ix86_asm_string != 0)

1205   {

1206     if (!strcmp (ix86_asm_string, "intel"))

1207       ix86_asm_dialect = ASM_INTEL;

1208     else if (!strcmp (ix86_asm_string, "att"))

1209       ix86_asm_dialect = ASM_ATT;

1210     else

1211       error ("bad value (%s) for -masm= switch", ix86_asm_string);

1212   }

1213   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))

1214     error ("code model `%s' not supported in the %s bit mode",

1215      ix86_cmodel_string, TARGET_64BIT ? "64" : "32");

1216   if (ix86_cmodel == CM_LARGE)

1217     sorry ("code model `large' not supported yet");

1218   if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))

1219     sorry ("%i-bit mode not compiled in",

1220      (target_flags & MASK_64BIT) ? 64 : 32);

 

上面1167行,对于x86芯片DEFAULT_PCC_STRUCT_RETURN定义为1。而1177行的TARGET_CPU_DEFAULT将选定默认芯片,对于32位芯片,它的值是0(因而选用最普通的名字“i386”)。另在1170行的SUBTARGET_OVERRIDE_OPTIONS对于x86芯片是没有定义的。

对于那些形如ix86_*_string的变量,我们在set_target_switch中已经看到对它们的赋值。【6】对相关的选项给出如下的描述(针对x86体系)。

-mtune=cpu-type

除了ABI及可用指令集,根据cpu-type,对产生的代码作出所有合适的调整。可选则的cpu-type有:

generic 为最通用的IA32/AMD64/EM64T处理器产生优化代码。如果你知道你的代码所运行的CPU,那么你应该所有相应的-mtune选项而不是-mtune=generic。但是,如果你不确切知道你的应用的用户所用的CPU,那么你应该所有这个选项。

因为新的芯片发布后,这个选项的行为将有改变。因此,如果你升级到新版本的GCC,代码生成选项将改变以反映该版本GCC发布时最通用的处理器。

没有选项-march=generic,因为-march表示编译器可使用的指令集,而没有一个通用的指令集可以用于所有的处理器。相对的,-mtune表示代码为该处理器(或者,在我们这种情况下,一组处理器)所优化。

native  通过在编译时刻确定编译机器的处理器类型,选择为之调整代码的CPU。使用-mtune=native将,在所选的指令集的限制下,产生为本地机器优化的代码。使用-march=native将使能本地机器所支持的所有指令子集(因此编译结果不能运行在别的机器上)。

i386   原始的 Intel i386 CPU.

i486   Intel i486 CPU。(该芯片没有实现调度(scheduling))

i586pentium

没有MMXIntel Pentium CPU

pentium-mmx

基于Pentium 核心支持MMX指令集的Intel PentiumMMX CPU

pentiumpro

Intel PentiumPro CPU

i686   和通用同义但当用作march选项时,使用PentiumPro指令集,因此代码将运行在所有i686芯片族。

pentium2 基于PentiumPro 核心支持MMX指令集的Intel Pentium2 CPU

pentium3pentium3m

基于PentiumPro 核心支持MMXSSE 指令集的Intel Pentium3 CPU

pentium-m

支持MMXSSESSE2 指令集的低功耗的Intel Pentium3 CPU

pentium4pentium4m

支持MMXSSESSE2 指令集的Intel Pentium4 CPU

prescott 支持MMXSSESSE2SSE3 指令集的改进Intel Pentium4 CPU

nocona 支持MMXSSESSE2SSE3 指令集及64位扩展的改进Intel Pentium4 CPU

core2  支持MMXSSESSE2SSE3 指令集及64位扩展的Intel Core2 CPU

k6     支持MMX指令集的AMD K6 CPU

k6-2k6-3 支持MMX3dNOW!指令集的改进AMD K6 CPU

Athlonathlon-tbird

支持MMX3dNOW!增强3dNOW!SSE预取指令集的AMD Athlon CPU

athlon-4athlon-xpathlon-mp

支持MMX3dNOW!,增强3dNOW!SSE完整指令集的改进AMD Athlon CPU

k8opteronathlon64athlon-fx

基于AMD K8核心,支持x86-64指令集的CPU(这是MMXSSESSE23dNOW!,增强3dNOW!64-bit指令集扩展的超集)。

k8-sse3opteron-sse3athlon64-sse3

支持SSE3指令集的改进k8opteronathlon64

amdfam10barcelona

基于AMD Family 10h核心,支持x86-64指令集的CPU(这是MMXSSESSE23dNOW!,增强3dNOW!64-bit指令集扩展的超集)。

winchip-c6

IDT Winchip C6 CPU,与支持MMX指令集的i486处理相同。

winchip2 IDT Winchip2 CPU,与支持MMX3dNOW!指令集的i486处理相同。

c3     支持MMX3dNOW!指令集的C3 CPU(该芯片没有实现调度)。

c3-2   支持MMXSSE指令集的C3-2 CPU(该芯片没有实现调度)。

geode  支持MMX3dNOW!指令集的嵌入式AMD CPU

 

选定特定的cpu类型将作出对该芯片合适的安排,除非使用了-march=cpu-type 选项,编译器不会产生在i386以外运行的代码。

 

-march=cpu-type

为机器类型为cpu-type的机器产生代码。Cpu类型的选择与-mtune选项相同。更进一步,指定-march=cpu-type隐含-mtune=cpu-type

 

-mcpu=cpu-type

已过时的-mtune的同义词。

 

-masm=dialect

使用选定的方言输出汇编指令。支持的选择有intelatt(默认值)。Darwin不支持intel

 

除了上面所说的,在64位环境中,AMD x86-64 处理器还支持以下-m选项。

-m32 -m64

32位或64位环境产生代码。32位环境设置intlong及指针为32位大小,并产生能在任意i386系统上运行的代码。64位环境设置int32位大小,而long和指针为64位大小,并为AMD’s x86-64架构产生代码。对于darwin-m64选项会关闭-fno-pic-mdynamic-no-pic选项。

-mno-red-zone

对于x86-64的代码,不要使用所谓的红区(red zone)。红区是x86-64 ABI的要求,它是一个在栈指针位置以外,不会被信号及异常句柄所修改的128字节大小的区域,因此它可被用于保存临时变量而不需调整栈指针。设置标识符-mno-red-zone,则禁用红区。

-mcmodel=small

为小代码模式产生代码:程序及其符号必须被链接入地址空间的低2 GB部分。指针是64 位大小。程序可以被静态或动态链接。这是默认的代码模式(the default code model)。

-mcmodel=kernel

为内核代码模式产生代码。内核运行在地址空间的高2 GB部分。这个模式为Linux内核代码使用。

-mcmodel=medium

为中等模式产生代码:程序被链接入地址空间的低2 GB,但符号可以位于地址空间的任一处。程序可以被静态或动态链接,但中等模式不支持创建共享库。

-mcmodel=large

为大模式产生代码:这个模式不对段的大小及地址做任何假设。

上面的段落解释了下面cmodel的含义。

 

108    enum cmodel {                                                                                        in i386.h

109      CM_32,    /* The traditional 32-bit ABI.  */

110       CM_SMALL,   /* Assumes all code and data fits in the low 31 bits.  */

111       CM_KERNEL,       /* Assumes all code and data fits in the high 31 bits.  */

112       CM_MEDIUM,       /* Assumes code fits in the low 31 bits; data unlimited.  */

113       CM_LARGE,   /* No assumptions.  */

114       CM_SMALL_PIC   /* Assumes code+data+got/plt fits in a 31 bit region.  */

115     };

 

override_options (continue)

 

1222   for (i = 0; i < pta_size; i++)

1223     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))

1224     {

1225       ix86_arch = processor_alias_table[i].processor;

1226       /* Default cpu tuning to the architecture.  */

1227       ix86_tune = ix86_arch;

1228       if (processor_alias_table[i].flags & PTA_MMX

1229          && !(target_flags_explicit & MASK_MMX))

1230         target_flags |= MASK_MMX;

1231       if (processor_alias_table[i].flags & PTA_3DNOW

1232          && !(target_flags_explicit & MASK_3DNOW))

1233         target_flags |= MASK_3DNOW;

1234       if (processor_alias_table[i].flags & PTA_3DNOW_A

1235          && !(target_flags_explicit & MASK_3DNOW_A))

1236         target_flags |= MASK_3DNOW_A;

1237       if (processor_alias_table[i].flags & PTA_SSE

1238          && !(target_flags_explicit & MASK_SSE))

1239         target_flags |= MASK_SSE;

1240       if (processor_alias_table[i].flags & PTA_SSE2

1241          && !(target_flags_explicit & MASK_SSE2))

1242         target_flags |= MASK_SSE2;

1243       if (processor_alias_table[i].flags & PTA_SSE3

1244          && !(target_flags_explicit & MASK_SSE3))

1245         target_flags |= MASK_SSE3;

1246       if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)

1247         x86_prefetch_sse = true;

1248       if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))

1249         error ("CPU you selected does not support x86-64 instruction set");

1250       break;

1251     }

1252

1253   if (i == pta_size)

1254     error ("bad value (%s) for -march= switch", ix86_arch_string);

1255

1256   for (i = 0; i < pta_size; i++)

1257     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))

1258     {

1259       ix86_tune = processor_alias_table[i].processor;

1260       if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))

1261         error ("CPU you selected does not support x86-64 instruction set");

1262

1263       /* Intel CPUs have always interpreted SSE prefetch instructions as

1264         NOPs; so, we can enable SSE prefetch instructions even when

1265         -mtune (rather than -march) points us to a processor that has them.

1266         However, the VIA C3 gives a SIGILL, so we only do that for i686 and

1267         higher processors.  */

1268       if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))

1269         x86_prefetch_sse = true;

1270       break;

1271     }

1272   if (i == pta_size)

1273     error ("bad value (%s) for -mtune= switch", ix86_tune_string);

 

上面target_flags_explicit也是在set_target_switch中设置的。它记录了特定的选项是否打开。因此,编译器可以根据processor_alias_table的知识进行自动补齐。

 

override_options (continue)

 

1275   if (optimize_size)

1276     ix86_cost = &size_cost;

1277   else

1278     ix86_cost = processor_target_table[ix86_tune].cost;

1279   target_flags |= processor_target_table[ix86_tune].target_enable;

1280   target_flags &= ~processor_target_table[ix86_tune].target_disable;

1281

1282   /* Arrange to set up i386_stack_locals for all functions.  */

1283   init_machine_status = ix86_init_machine_status;

1284

1285   /* Validate -mregparm= value.  */

1286   if (ix86_regparm_string)

1287   {

1288     i = atoi (ix86_regparm_string);

1289     if (i < 0 || i > REGPARM_MAX)

1290       error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);

1291     else

1292       ix86_regparm = i;

1293   }

1294   else

1295    if (TARGET_64BIT)

1296      ix86_regparm = REGPARM_MAX;

1297

1298   /* If the user has provided any of the -malign-* options,

1299     warn and use that value only if -falign-* is not set.

1300     Remove this code in GCC 3.2 or later.  */

1301   if (ix86_align_loops_string)

1302   {

1303     warning ("-malign-loops is obsolete, use -falign-loops");

1304     if (align_loops == 0)

1305     {

1306       i = atoi (ix86_align_loops_string);

1307       if (i < 0 || i > MAX_CODE_ALIGN)

1308         error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);

1309       else

1310         align_loops = 1 << i;

1311     }

1312   }

1313

1314   if (ix86_align_jumps_string)

1315   {

1316     warning ("-malign-jumps is obsolete, use -falign-jumps");

1317     if (align_jumps == 0)

1318     {

1319       i = atoi (ix86_align_jumps_string);

1320       if (i < 0 || i > MAX_CODE_ALIGN)

1321         error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);

1322       else

1323         align_jumps = 1 << i;

1324     }

1325   }

1326

1327   if (ix86_align_funcs_string)

1328   {

1329     warning ("-malign-functions is obsolete, use -falign-functions");

1330     if (align_functions == 0)

1331     {

1332       i = atoi (ix86_align_funcs_string);

1333       if (i < 0 || i > MAX_CODE_ALIGN)

1334         error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);

1335       else

1336         align_functions = 1 << i;

1337     }

1338   }

1339

1340   /* Default align_* from the processor table.  */

1341   if (align_loops == 0)

1342   {

1343     align_loops = processor_target_table[ix86_tune].align_loop;

1344     align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;

1345   }

1346   if (align_jumps == 0)

1347   {

1348     align_jumps = processor_target_table[ix86_tune].align_jump;

1349     align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;

1350   }

1351   if (align_functions == 0)

1352   {

1353     align_functions = processor_target_table[ix86_tune].align_func;

1354   }

1355

1356   /* Validate -mpreferred-stack-boundary= value, or provide default.

1357     The default of 128 bits is for Pentium III's SSE __m128, but we

1358     don't want additional code to keep the stack aligned when

1359     optimizing for code size.  */

1360   ix86_preferred_stack_boundary = (optimize_size

1361                        ? TARGET_64BIT ? 128 : 32

1362                        : 128);

1363   if (ix86_preferred_stack_boundary_string)

1364   {

1365     i = atoi (ix86_preferred_stack_boundary_string);

1366     if (i < (TARGET_64BIT ? 4 : 2) || i > 12)

1367       error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,

1368          TARGET_64BIT ? 4 : 2);

1369     else

1370       ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;

1371   }

1372

1373   /* Validate -mbranch-cost= value, or provide default.  */

1374   ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;

1375   if (ix86_branch_cost_string)

1376   {

1377     i = atoi (ix86_branch_cost_string);

1378     if (i < 0 || i > 5)

1379       error ("-mbranch-cost=%d is not between 0 and 5", i);

1380     else

1381       ix86_branch_cost = i;

1382   }

1383

1384   if (ix86_tls_dialect_string)

1385   {

1386     if (strcmp (ix86_tls_dialect_string, "gnu") == 0)

1387       ix86_tls_dialect = TLS_DIALECT_GNU;

1388     else if (strcmp (ix86_tls_dialect_string, "sun") == 0)

1389       ix86_tls_dialect = TLS_DIALECT_SUN;

1390     else

1391       error ("bad value (%s) for -mtls-dialect= switch",

1392          ix86_tls_dialect_string);

1393   }

 

对于Intel x86,在上面代码中所引用的目标选项中,那些在intel芯片中可用的由【6】给出如下。上面,REGPARM_MAX对于32位芯片是3,而MAX_CODE_ALIGN在函数的开头是16

-mregparm=num

控制多少个寄存器用于传递整型参数。默认情况下,没有寄存器用于传递参数,并且最多可以使用3个寄存器。可以通过使用函数属性regparm来控制特定函数的这个行为。

警告:如果使用这个选项,并且num0,那么使用同样的值来编译所有的模块,包括所有的库。这里面包括系统库及启动模块。

-mpreferred-stack-boundary=num

尝试保持栈对齐在2num指数倍字节的边界。如果没有指明-mpreferred-stack-boundary,默认值是416字节或128比特)。

PentiumPentiumPro上,doublelong double类型的值应该对齐在8字节的边界(参考-malign-double)或者忍受显著的性能下降。在Pentium III上, SIMD扩展(SSE) 流数据类型__m128如果不在16字节上对齐,可能不能正常工作。

为了确保在栈中这个值的正确对齐,栈的边界必须满足栈中保存的任意值的对齐要求。更甚,每个产生的函数都需要保证栈被对齐。因此,由一个使用较小栈边界编译的函数调用一个使用较大栈边界编译的函数,很可能会导致栈失调(misalign)。建议使用回调的库应一直使用默认设置。

这个额外的对齐会消耗额外的栈空间,并且通常会增加代码大小。对栈空间的使用敏感的代码,比如嵌入式系统及操作系统内核,可能希望减少期望的对齐到-mpreferred-stack-boundary=2

 

override_options (continue)

 

1395   /* Keep nonleaf frame pointers.  */

1396   if (TARGET_OMIT_LEAF_FRAME_POINTER)

1397     flag_omit_frame_pointer = 1;

1398

1399   /* If we're doing fast math, we don't care about comparison order

1400     wrt NaNs. This lets us use a shorter comparison sequence.  */

1401   if (flag_unsafe_math_optimizations)

1402     target_flags &= ~MASK_IEEE_FP;

1403

1404   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,

1405     since the insns won't need emulation.  */

1406   if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))

1407     target_flags &= ~MASK_NO_FANCY_MATH_387;

1408

1409   /* Turn on SSE2 builtins for -msse3.  */

1410   if (TARGET_SSE3)

1411     target_flags |= MASK_SSE2;

1412

1413   /* Turn on SSE builtins for -msse2.  */

1414   if (TARGET_SSE2)

1415     target_flags |= MASK_SSE;

1416

1417   if (TARGET_64BIT)

1418   {

1419     if (TARGET_ALIGN_DOUBLE)

1420       error ("-malign-double makes no sense in the 64bit mode");

1421     if (TARGET_RTD)

1422       error ("-mrtd calling convention not supported in the 64bit mode");

1423     /* Enable by default the SSE and MMX builtins.  */

1424     target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);

1425     ix86_fpmath = FPMATH_SSE;

1426   }

1427   else

1428   {

1429     ix86_fpmath = FPMATH_387;

1430     /* i386 ABI does not specify red zone. It still makes sense to use it

1431       when programmer takes care to stack from being destroyed.  */

1432     if (!(target_flags_explicit & MASK_NO_RED_ZONE))

1433       target_flags |= MASK_NO_RED_ZONE;

1434   }

1435

1436   if (ix86_fpmath_string != 0)

1437   {

1438     if (! strcmp (ix86_fpmath_string, "387"))

1439       ix86_fpmath = FPMATH_387;

1440     else if (! strcmp (ix86_fpmath_string, "sse"))

1441     {

1442       if (!TARGET_SSE)

1443       {

1444         warning ("SSE instruction set disabled, using 387 arithmetics");

1445         ix86_fpmath = FPMATH_387;

1446       }

1447       else

1448         ix86_fpmath = FPMATH_SSE;

1449     }

1450     else if (! strcmp (ix86_fpmath_string, "387,sse")

1451          || ! strcmp (ix86_fpmath_string, "sse,387"))

1452     {

1453       if (!TARGET_SSE)

1454       {

1455         warning ("SSE instruction set disabled, using 387 arithmetics");

1456         ix86_fpmath = FPMATH_387;

1457       }

1458       else if (!TARGET_80387)

1459       {

1460         warning ("387 instruction set disabled, using SSE arithmetics");

1461         ix86_fpmath = FPMATH_SSE;

1462       }

1463       else

1464         ix86_fpmath = FPMATH_SSE | FPMATH_387;

1465     }

1466     else

1467       error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);

1468   }

 

上面的ix86_fpmath_string用于下列选项【6】。

-mfpmath=unit

为选定单元产生浮点算术。单元的选择有:

387  使用在大多数芯片及模拟器都存在的标准387浮点协处理器。用此选项编译的代码几乎可以运行在任何地方。临时结果以80位精度计算,而不是以类型所指定的精度。与其他大部分芯片比较,这将导致微小的差异。更多细节,参考-ffloat-store对于i386编译器,这是默认的选择。

sse  使用SSE指令集中的纯量浮点指令scalar floating point instruction)。这个指令集为Pentium3及更新的芯片支持,在AMD系列,则为Athlon-4Athlon-xpAthlon-mp 支持。更早版本的SSE指令集仅支持单精度算术,因此双精度及扩展精度仍需使用387。更新的版本,仅出现在Pentium4及未来的AMD x86-64芯片,支持双精度算术。

对于i386编译器,需要使用-march=cpu-type-msse-msse2选项来启动SSE 扩展以使得该选项生效。对于x86-64编译器,这些扩展默认就是生效的。

在大多数情况下,生成的代码要快得多,并且避免了387代码数值不稳定的问题,但可能会破坏一些期望临时结果为80位的现存代码。这是x86-64编译器的默认的选择

sse387 尝试同时应用两者指令集。这有效地加倍了可用的寄存器,而且在具有分立的387SSE执行单元的芯片上,亦增加了执行资源。使用该选择需小心,因为它仍在试验中,GCC寄存器分配器不能很好地为分立功能单元建模,也导致性能不稳定。

 

override_options (continue)

 

1470   /* It makes no sense to ask for just SSE builtins, so MMX is also turned

1471     on by -msse.  */

1472   if (TARGET_SSE)

1473   {

1474     target_flags |= MASK_MMX;

1475     x86_prefetch_sse = true;

1476   }

1477

1478   /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */

1479   if (TARGET_3DNOW)

1480   {

1481     target_flags |= MASK_MMX;

1482     /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX

1483       extensions it adds.  */

1484     if (x86_3dnow_a & (1 << ix86_arch))

1485       target_flags |= MASK_3DNOW_A;

1486   }

1487   if ((x86_accumulate_outgoing_args & TUNEMASK)

1488       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)

1489       && !optimize_size)

1490     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;

1491

1492   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */

1493   {

1494     char *p;

1495     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);

1496     p = strchr (internal_label_prefix, 'X');

1497     internal_label_prefix_len = p - internal_label_prefix;

1498     *p = '/0';

1499   }

1500 }

 

在上面的1487行,x86_accumulate_outgoing_args定义如下:

 

507    const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;   in i386.c

 

其中,例如,m_ATHLON_K8具有以下定义。

 

470    #define m_K8  (1<<PROCESSOR_K8)                                                           in i386.c

471    #define m_ATHLON_K8  (m_K8 | m_ATHLON)

 

PROCESSOR_K8是枚举类型processor_type的其中一个值。显然,变量,形如x86_accumulate_outgoing_args,定义了具有指定特性的芯片。

1495行,在Linux上,ASM_GENERATE_INTERNAL_LABEL被定义为:

 

213    #undef ASM_GENERATE_INTERNAL_LABEL                                        in linux.h

214    #define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)    /

215      sprintf (LABEL, "*.L%s%ld", PREFIX, (long)(NUM))

 

因此internal_label_prefix将是 “*.LLX”(在1498 ‘/0’将替换‘0’)。

 

你可能感兴趣的:(String,table,asynchronous,64bit,X86,loops)