Studying note of GCC-3.4.6 source (26 cont1)

common_handle_option (continue)

 

909      case OPT_fcall_used_:

910        fix_register (arg, 0, 1);

911        break;

912 

913      case OPT_fcall_saved_:

914        fix_register (arg, 0, 0);

915        break;

916 

917      case OPT_fcaller_saves:

918        flag_caller_saves = value;

919        break;

920 

921      case OPT_fcommon:

922        flag_no_common = !value;

923        break;

924 

925      case OPT_fcprop_registers:

926        flag_cprop_registers = value;

927        break;

928 

929      case OPT_fcrossjumping:

930        flag_crossjumping = value;

931        break;

932 

933      case OPT_fcse_follow_jumps:

934        flag_cse_follow_jumps = value;

935        break;

936 

937      case OPT_fcse_skip_blocks:

938        flag_cse_skip_blocks = value;

939        break;

940 

941      case OPT_fdata_sections:

942        flag_data_sections = value;

943        break;

944 

945      case OPT_fdefer_pop:

946        flag_defer_pop = value;

947        break;

948 

949      case OPT_fdelayed_branch:

950        flag_delayed_branch = value;

951        break;

952 

953      case OPT_fdelete_null_pointer_checks:

954        flag_delete_null_pointer_checks = value;

955        break;

 

Switch –fcall-used-`register` treats the named register as a register that is available for allocation, but will have its contents destroyed by a function call. It may be allocated for temporary storage, but must be reloaded after a function call. This option must not be specified for registers that have fixed roles (such as a frame pointer or stack pointer). The register names are platform dependent and are named in the REGISTER_NAMES macro of the machine description.

And switch –fcaller-saved-`register` treats the named register as a register that can be allocated to contain a value, and that value will be retained even through a function call. Functions compiled with this option set must save and restore the contents of the register. This option must not be specified for registers that have fixed roles (such as a frame pointer or stack pointer). The register names are platform dependent and are named in the REGISTER_NAMES macro of the machine description.

These switches are handled by fix_register. Notice the call_used parameter passed to the function.

 

720  void

721  fix_register (const char *name, int fixed, int call_used)                                         in regclass.c

722  {

723    int i;

724 

725    /* Decode the name and update the primary form of

726      the register info.  */

727 

728    if ((i = decode_reg_name (name)) >= 0)

729    {

730      if ((i == STACK_POINTER_REGNUM

731  #ifdef HARD_FRAME_POINTER_REGNUM

732         || i == HARD_FRAME_POINTER_REGNUM

733  #else

734         || i == FRAME_POINTER_REGNUM

735  #endif

736         )

737         && (fixed == 0 || call_used == 0))

738      {

739        static const char * const what_option[2][2] = {

740            { "call-saved", "call-used" },

741            { "no-such-option", "fixed" }};

742 

743        error ("can't use '%s' as a %s register", name,

744             what_option[fixed][call_used]);

745      }

746      else

747      {

748        fixed_regs[i] = fixed;

749        call_used_regs[i] = call_used;

750  #ifdef CALL_REALLY_USED_REGISTERS

751        if (fixed == 0)

752          call_really_used_regs[i] = call_used;

753  #endif

754      }

755    }

756    else

757    {

758      warning ("unknown register name: %s", name);

759    }

760  }

 

decode_reg_name is used to fetch the register’s name. For assemble language, like that used in Linux, uses ‘%’ prefix when referring registers; at line 681 below, strip_reg_name gets rid of this prefix. Notice that, we can use decimal number or literal name to select the register. For literal name, it is matched by reg_names which contains names from REGISTER_NAMES.

 

673  int

674  decode_reg_name (const char *asmspec)                                                       in varasm.c

675  {

676    if (asmspec != 0)

677   {

678      int i;

679 

680      /* Get rid of confusing prefixes.  */

681      asmspec = strip_reg_name (asmspec);

682 

683      /* Allow a decimal number as a "register name".  */

684      for (i = strlen (asmspec) - 1; i >= 0; i--)

685        if (! ISDIGIT (asmspec[i]))

686          break;

687      if (asmspec[0] != 0 && i < 0)

688      {

689        i = atoi (asmspec);

690        if (i < FIRST_PSEUDO_REGISTER && i >= 0)

691          return i;

692        else

693          return -2;

694      }

695 

696      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)

697        if (reg_names[i][0]

698           && ! strcmp (asmspec, strip_reg_name (reg_names [i])))

699          return i;

700 

701  #ifdef ADDITIONAL_REGISTER_NAMES

702      {

703        static const struct { const char *const name; const int number; } table[]

704             = ADDITIONAL_REGISTER_NAMES;

705 

706        for (i = 0; i < (int) ARRAY_SIZE (table); i++)

707          if (! strcmp (asmspec, table[i].name))

708            return table[i].number;

709      }

710  #endif /* ADDITIONAL_REGISTER_NAMES */

711 

712      if (!strcmp (asmspec, "memory"))

713        return -4;

714 

715      if (!strcmp (asmspec, "cc"))

716       return -3;

717 

718      return -2;

719    }

720 

721    return -1;

722  }

 

For x86 machine, REGISTER_NAMES defines names of basic register set. This part of registers can be indexed by compiler's hard-register-number (decimal number).

 

2746 #define HI_REGISTER_NAMES                            /                                         in i386.h

2747 {"ax","dx","cx","bx","si","di","bp","sp",                        /

2748  "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)",        /

2749  "argp", "flags", "fpsr", "dirflag", "frame",                           /

2750  "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",            /

2751  "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"      ,             /

2752  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",                     /

2753  "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"}

2754

2755 #define REGISTER_NAMES HI_REGISTER_NAMES

 

Further for x86 machine, ADDITIONAL_REGISTER_NAMES defines additional registers on the platform, however, these registers can’t be indexed by compiler's hard-register-number.

 

2759 #define ADDITIONAL_REGISTER_NAMES /                                            in i386.h

2760 { { "eax", 0 }, { "edx", 1 }, { "ecx", 2 }, { "ebx", 3 },     /

2761   { "esi", 4 }, { "edi", 5 }, { "ebp", 6 }, { "esp", 7 },     /

2762   { "rax", 0 }, { "rdx", 1 }, { "rcx", 2 }, { "rbx", 3 },    /

2763   { "rsi", 4 }, { "rdi", 5 }, { "rbp", 6 }, { "rsp", 7 },      /

2764   { "al", 0 }, { "dl", 1 }, { "cl", 2 }, { "bl", 3 },            /

2765   { "ah", 0 }, { "dh", 1 }, { "ch", 2 }, { "bh", 3 },         /

2766   { "mm0", 8},  { "mm1", 9},  { "mm2", 10}, { "mm3", 11},    /

2767   { "mm4", 12}, { "mm5", 13}, { "mm6", 14}, { "mm7", 15} }

 

Then in fix_register, at line 748, fixed_regs is indexed by hard register number, it contains 1 in the bit corresponding to register that has fixed use. These are the registers that cannot be used to allocate a pseudo register for general use.

At line 749, call_used_regs is indexed by hard register number, it contains 1 in the bit corresponding to register that has fixed use or is clobbered by function calls. These are the registers that cannot be used to allocate a pseudo register whose life crosses calls unless we are able to save/restore them across the calls.

At line 750, CALL_REALLY_USED_REGISTERS is not defined upon our target platform, so call_really_used_regs is also not declared.

Then back common_handle_option, at line 922, flag_no_common (-fcommon), specifying -fno-common will cause the compiler to make an explicit allocation of space in the data section for each global variable. The default is to allocate them in a common block that is resolved by the linker, so declaring the same global variable more than once causes the linker to resolve the two into one. You can specify -fno-common to verify that the program will compile and link on another system that does not use GCC.

And at line 942, flag_data_sections (-fdata-sections), if nonzero, each data item is placed into its own named section in the assembly language output. The section name is derived from the name of the data item. This only has an advantage on machines with a linker that can use sectioning to optimize allocation of space. For the same optimization with the executable code, see -ffunction-sections. Setting this option for a machine that does not support sectioning in its assembler code will result in a warning message, and the option will be ignored. Even on machines that support such sectioning there will be no advantage unless that linker uses the organization for optimization. In fact, it could have a detrimental effect by making the object code larger and slower to load.

If the -p option is set for profiling, this option (-fdata-sections) will have no effect. Also, because of the rearrangement of the code, you may have problems with the -g option and debugging.

And at line 950, flag_delayed_branch (-fdelayed-branch), if nonzero, this flag only has effect on machines with delayed branch slots. This has to do with loading and executing instructions at the same time the decision is being made whether to take a branch. After the decision is made the result of the instruction may be discarded depending on the location of the instruction and the decision made. This flag is set by every level of optimization if the target machine supports it, but it can be overridden by -fno-delayed-branch.

 

common_handle_option (continue)

 

957       case OPT_fdiagnostics_show_location_:

958         if (!strcmp (arg, "once"))

959           diagnostic_prefixing_rule (global_dc) = DIAGNOSTICS_SHOW_PREFIX_ONCE;

960         else if (!strcmp (arg, "every-line"))

961           diagnostic_prefixing_rule (global_dc)

962               = DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE;

963         else

964           return 0;

965         break;

966 

967       case OPT_fdump_unnumbered:

968         flag_dump_unnumbered = value;

969         break;

970 

971       case OPT_feliminate_dwarf2_dups:

972         flag_eliminate_dwarf2_dups = value;

973         break;

974 

975       case OPT_feliminate_unused_debug_types:

976         flag_eliminate_unused_debug_types = value;

977         break;

978 

979         case OPT_feliminate_unused_debug_symbols:

980           flag_debug_only_used_symbols = value;

981           break;

982       

983         case OPT_fexceptions:

984           flag_exceptions = value;

985           break;

986       

987         case OPT_fexpensive_optimizations:

988           flag_expensive_optimizations = value;

989           break;

990       

991         case OPT_ffast_math:

992           set_fast_math_flags (value);

993           break;

994       

995         case OPT_ffinite_math_only:

996           flag_finite_math_only = value;

997           break;

998       

999      case OPT_ffixed_:

1000         fix_register (arg, 1, 1);

1001       break;

1002

1003     case OPT_ffunction_cse:

1004       flag_no_function_cse = !value;

1005       break;

1006

1007     case OPT_ffloat_store:

1008       flag_float_store = value;

1009       break;

1010

1011     case OPT_fforce_addr:

1012       flag_force_addr = value;

1013       break;

1014

1015     case OPT_fforce_mem:

1016       flag_force_mem = value;

1017       break;

1018

1019     case OPT_ffunction_sections:

1020       flag_function_sections = value;

1021       break;

1022

1023     case OPT_fgcse:

1024       flag_gcse = value;

1025       break;

1026

1027     case OPT_fgcse_lm:

1028       flag_gcse_lm = value;

1029       break;

1030

1031     case OPT_fgcse_sm:

1032       flag_gcse_sm = value;

1033       break;

1034

1035     case OPT_fgcse_las:

1036       flag_gcse_las = value;

1037       break;

1038

1039     case OPT_fguess_branch_probability:

1040       flag_guess_branch_prob = value;

1041       break;

1042

1043     case OPT_fident:

1044       flag_no_ident = !value;

1045       break;

1046

1047     case OPT_fif_conversion:

1048       flag_if_conversion = value;

1049       break;

1050

1051     case OPT_fif_conversion2:

1052       flag_if_conversion2 = value;

1053       break;

1054

1055     case OPT_finhibit_size_directive:

1056       flag_inhibit_size_directive = value;

1057       break;

1058

1059     case OPT_finline:

1060       flag_no_inline = !value;

1061       break;

1062

1063     case OPT_finline_functions:

1064       flag_inline_functions = value;

1065       break;

 

This sinppet code using following variables.

flag_dump_unnumbered (-fdump-unnumbered), when debugging the compiler with the -d option, this option will suppress instruction numbers and line numbers in the output files, which makes it easier to use diff to compare dumps.

flag_eliminate_dwarf2_dups (-feliminate-dwarf2-dups) if nonzero, doing dwarf2 duplicate elimination.

flag_eliminate_unused_debug_types (-feliminate-unused-debug-types) if nonzero, doing unused type elimination.

flag_debug_only_used_symbols (-feliminate-unused-debug-symbols) if nonzero, means emit debugging information only for symbols which are used.

flag_exceptions (-fexceptions) if nonzero, enables exception handling. This option generates the extra code necessary to process the throwing and catching of exceptions. If you do not specify this option, it will be specified automatically for languages such as Ada, Java, and C++ that normally throw exceptions.

flag_finite_math_only (-ffinite-math-only) if nonzero, means that no NaNs or +-Infs are expected.

flag_no_function_cse (-fno-function-cse) if nonzero, will cause each instruction making a call to implicitly include the address of the function. If zero (-ffunction-cse), function calls are made with the function address stored in a register, which produces more efficient code.

flag_float_store (-ffloat-store) if nonzero, doesn’t allocate registers to hold floating point values. On some machines this option may cause the registers to extend the precision beyond that defined for the language, and thus will carry more precision than the floating point data stored in memory. This flag is only useful if your program must restrict its precision to exactly that defined by the IEEE standard.

flag_force_addr (-ffloat-addr) if nonzero, address must be copied into registers to have arithmetic performed on them. This improves the generated code because addresses needed will often have been previously loaded into a register and do not need to be loaded again.

flag_function_sections (-ffunction-sections) if nonzero, each function is placed into its own named section in the assembly language output. The section name is derived from the function name. This only has an advantage on machines with a linker that can use sectioning to optimize allocation of space. For the same optimization with data, see -fdata-sections. Setting this option for a machine that does not support sectioning in its assembler code will result in a warning message, and the option will be ignored. Even on machines that support such sectioning there will be no advantage unless that linker uses the organization for optimization. In fact, it could have a detrimental effect by making the object code larger and slower to load.

If the -p option is set for profiling, this option will have no effect. Also, because of the rearrangement of the code, you may have problems with the -g option and debugging.

flag_gcse_lm (-fgcse-lm) if nonzero, performs global common subexpression elmination optimization by detecting load and store operations inside a loop, in which the load operation is in a form that can be moved in front of the loop and thus only occur once. This is the default, but will have no effect except when -Os is set. It can be overridden with -no-fgcse-lm.

flag_gcse_sm (-fgcse-sm) if nonzero, performs global common subexpression elmination optimization by detecting load and store operations inside a loop, in which the store operation is in a form that can be moved after the loop and thus only occur once. This is the default, but will have no effect except when -Os is set. It can be overridden with -no-fgcse-sm.

flag_gcse_las (-fgcse-las) if nonzero, the global common subexpression elimination pass eliminates redundant loads that come after stores to the same memory location (both partial and full redundancies).

flag_no_ident (-fno-ident) if nonzero, means ignore #ident directives. The #ident directive takes one argument, a string constant. On some systems, that string constant is copied into a special segment of the object file. On other systems, the directive is ignored.

flag_inhibit_size_directive (-finhibit-size-directive) if nonzero, doesn’t output a .size assembler directive, nor any other directive that could cause problems if the function is split and the two halves are separated in memory. This option is a special case used to compile crtstuff.c (part of GCC) and is not expected to have any other purpose.

For -ffast-math, certain mathematical calculations are made faster by violating some of the ISO and IEEE rules. For example, with this option set it is assumed that no negative values are passed to sqrt() and that all floating point values are valid. Setting this option causes the preprocessor macro __FAST_MATH__ to be defined

This switch is handled by set_fast_math_flags.

 

1590 void

1591 set_fast_math_flags (int set)                                                                              in opts.c

1592 {

1593   flag_trapping_math = !set;

1594   flag_unsafe_math_optimizations = set;

1595   flag_finite_math_only = set;

1596   flag_errno_math = !set;

1597   if (set)

1598   {

1599     flag_signaling_nans = 0;

1600     flag_rounding_math = 0;

1601   }

1602 }

 

In this function,

flag_trapping_math (-ftrapping-math) if zero, means that floating-point math operations cannot generate a (user-visible) trap. This is the case, for example, in nonstop IEEE 754 arithmetic. Trapping conditions include division by zero, overflow, underflow, invalid and inexact, but does not include operations on signaling NaNs.

flag_unsafe_math_optimizations (-funsafe-math-optimizations) if nonzero, means that unsafe floating-point math optimizations are allowed for the sake of speed. IEEE compliance is not guaranteed, and operations are allowed to assume that their arguments and results are "normal" (e.g., nonnegative for SQRT).

flag_errno_math, if nonzero, means the front end generally wants `errno' maintained by math operations, like built-in SQRT.

flag_signaling_nans (-fsignaling-nans) if nonzero, means disable transformations observable by signaling NaNs. This option implies that any operation on an IEEE signaling NaN can generate a (user-visible) trap.

flag_rounding_math (-frounding-math) if nonzero, means disable transformations that assume default floating point rounding behavior.

 

common_handle_option (continue)

 

1067     case OPT_finline_limit_:

1068     case OPT_finline_limit_eq:

1069       set_param_value ("max-inline-insns-single", value / 2);

1070       set_param_value ("max-inline-insns-auto", value / 2);

1071       set_param_value ("max-inline-insns-rtl", value);

1072       break;

1073

1074     case OPT_finstrument_functions:

1075       flag_instrument_function_entry_exit = value;

1076       break;

1077

1078     case OPT_fkeep_inline_functions:

1079       flag_keep_inline_functions =value;

1080       break;

1081

1082     case OPT_fkeep_static_consts:

1083       flag_keep_static_consts = value;

1084       break;

1085

1086     case OPT_fleading_underscore:

1087       flag_leading_underscore = value;

1088       break;

1089

1090     case OPT_floop_optimize:

1091       flag_loop_optimize = value;

1092       break;

1093

1094     case OPT_fmath_errno:

1095       flag_errno_math = value;

1096       break;

1097

1098     case OPT_fmem_report:

1099       mem_report = value;

1100       break;

1101

1102     case OPT_fmerge_all_constants:

1103       flag_merge_constants = value + value;

1104       break;

1105

1106     case OPT_fmerge_constants:

1107       flag_merge_constants = value;

1108       break;

1109

1110     case OPT_fmessage_length_:

1111       pp_set_line_maximum_length (global_dc->printer, value);

1112       break;

1113

1114     case OPT_fmove_all_movables:

1115       flag_move_all_movables = value;

1116       break;

1117

1118     case OPT_fnew_ra:

1119       flag_new_regalloc = value;

1120       break;

1121

1122     case OPT_fnon_call_exceptions:

1123       flag_non_call_exceptions = value;

1124       break;

1125

1126     case OPT_fold_unroll_all_loops:

1127       flag_old_unroll_all_loops = value;

1128       break;

1129

1130     case OPT_fold_unroll_loops:

1131       flag_old_unroll_loops = value;

1132       break;

1133

1134     case OPT_fomit_frame_pointer:

1135       flag_omit_frame_pointer = value;

1136       break;

1137

1138     case OPT_foptimize_register_move:

1139       flag_regmove = value;

1140       break;

1141

1142     case OPT_foptimize_sibling_calls:

1143       flag_optimize_sibling_calls = value;

1144       break;

1145

1146     case OPT_fpack_struct:

1147       flag_pack_struct = value;

1148       break;

1149

1150     case OPT_fpeel_loops:

1151       flag_peel_loops_set = true;

1152       flag_peel_loops = value;

1153       break;

1154

1155     case OPT_fpcc_struct_return:

1156       flag_pcc_struct_return = value;

1157       break;

1158

1159     case OPT_fpeephole:

1160       flag_no_peephole = !value;

1161       break;

1162

1163     case OPT_fpeephole2:

1164       flag_peephole2 = value;

1165       break;

1166

1167     case OPT_fpic:

1168       flag_pic = value;

1169       break;

1170

1171     case OPT_fpie:

1172       flag_pie = value;

1173       break;

1174

1175     case OPT_fprefetch_loop_arrays:

1176       flag_prefetch_loop_arrays = value;

1177       break;

 

Above, at line 1067 and 1068, switches -finline-limit and –finline-limit= overwrite "max-inline-insns-single", "max-inline-insns-auto", and "max-inline-insns-rtl" (refer to Table 9: target_switches for i386 system).

flag_instrument_function_entry_exit (-finstrument-functions) if nonzero, inserts code that will call a function at the entry and exit point of each function. The prototypes of the function calls are as follow:

void __cyg_profile_func_enter(void *this_fn,void *call_site);

void __cyg_profile_func_exit(void *this_fn,void *call_site);

The this_fn argument is the address of the function being called, which can be identified by symbol table information. The call_site argument identifies the caller. (On some platforms the call_site information is not available.) If a function is expanded inline, the function calls are inserted before and after the inline code. For purposes of identification there must be a non-inline version of the function available, even if all the calls to it generate inline code.

To prevent a function from having the code inserted, it can be declared with the attribute no_instrument_function. This may be necessary for interrupt handlers and functions from which the profiling routines cannot be called.

flag_keep_inline_functions (-fkeep-inline-functions) if nonzero, the compiler will generate a body for a function even if all of the references to it are expanded inline and there are actually no calls to it. The default is -fno-keep-inline-functions, which does not create bodies for functions that are not called.

flag_keep_static_consts (-fkeep-static-consts) if nonzero, constant values that are private to the compilation unit are allocated storage even if they are not referenced. To prevent allocating space for unused constants, use -fno-keep-static-consts.

flag_leading_underscore (-fleading-underscore) if nonzero, forces each symbol written to the object file to be modified to begin with an underscore character. The option -fno-leading-underscore will suppress the addition of an underscore character. This option is available for use when attempting to link with legacy assembly code.

mem_report (-fmem-report) if nonzero, when the compiler finishes, it prints a detailed listing of the amount of storage that has been allocated for each data type and other permanent memory allocation information.

flag_move_all_movables (-fmove-all-movables) if nonzero, all invariant expressions are moved outside the loop. Whether this produces better or worse code depends on the structure of the loops in the source code. The default is -fno-move-all-movables except for Fortran.

flag_new_regalloc (-fnew-ra) if nonzero, uses the graph coloring register allocator.

flag_non_call_exceptions (-fnon-call-exceptions) if nonzero, generates code that makes it possible for trapping instructions (such as invalid floating point operations or invalid memory addressing) to throw exceptions. This option is not universally available because it requires platform-specific runtime support. This is limited to hardware trap signals only and does not include general signals such as SIGALRM or SIGTERM.

flag_old_unroll_all_loops (-fold-unroll-all-loops) if nonzero, ables loop unrolling in unroll.c. All loops are unrolled. This is generally not a win.

flag_old_unroll_loops (-fold-unroll-loops) if nonzero, enables loop unrolling in unroll.c. Only loops for which the number of iterations can be calculated at compile-time (UNROLL_COMPLETELY, UNROLL_MODULO) or at run-time (preconditioned to be UNROLL_MODULO) are unrolled.

flag_optimize_sibling_calls (-foptimize-sibling-calls) if nonzero, optimizes recursive tail calls and sibling calls. This flag is automatically set by -O2, -O3, and -Os. The default is -fno-optimize-sibling-calls. The following is an example of a recursive tail call:

int rewhim(int x,int y) {

. . .

return(rewhim(x+1,y));

}

Optimization can be performed by, instead of making a new function call, inserting a command that jumps to the top of the function. A similar situation is shown in the following example of a sibling call:

int whim(int x,int y) {

. . .

return(wham(x+1,y));

}

In a sibling call, the call to the function wham() must be made, but the stack frame of whim() can be deleted by the call, causing wham() to return its value directly to the caller of whim().

flag_pack_struct (-fpack-struct) if nonzero, packs the members of structures together in such a way that no alignment space is inserted between the members of the structure. This could cause the executable code accessing structure members to be less efficient, and it could also cause the code to be incompatible with the system libraries.

flag_peel_loops (-fpeel-loops) if nonzero, enables loop peeling.

flag_no_peephole (-fno-peephole) if nonzeo, disables any machine-specific peephole optimizations..

flag_prefetch_loop_arrays (-fprefetch-loop-arrays) if nonzero, enables prefetch optimizations for arrays in loops.

你可能感兴趣的:(Studying note of GCC-3.4.6 source (26 cont1))