common_handle_option (continue)
909 case OPT_fcall_used_:
910 fix_register (arg, 0, 1);
911 break;
912
913 case OPT_fcall_saved_:
914 fix_register (arg, 0, 0);
915 break;
916
917 case OPT_fcaller_saves:
918 flag_caller_saves = value;
919 break;
920
921 case OPT_fcommon:
922 flag_no_common = !value;
923 break;
924
925 case OPT_fcprop_registers:
926 flag_cprop_registers = value;
927 break;
928
929 case OPT_fcrossjumping:
930 flag_crossjumping = value;
931 break;
932
933 case OPT_fcse_follow_jumps:
934 flag_cse_follow_jumps = value;
935 break;
936
937 case OPT_fcse_skip_blocks:
938 flag_cse_skip_blocks = value;
939 break;
940
941 case OPT_fdata_sections:
942 flag_data_sections = value;
943 break;
944
945 case OPT_fdefer_pop:
946 flag_defer_pop = value;
947 break;
948
949 case OPT_fdelayed_branch:
950 flag_delayed_branch = value;
951 break;
952
953 case OPT_fdelete_null_pointer_checks:
954 flag_delete_null_pointer_checks = value;
955 break;
Switch –fcall-used-`register` treats the named register as a register that is available for allocation, but will have its contents destroyed by a function call. It may be allocated for temporary storage, but must be reloaded after a function call. This option must not be specified for registers that have fixed roles (such as a frame pointer or stack pointer). The register names are platform dependent and are named in the REGISTER_NAMES macro of the machine description.
And switch –fcaller-saved-`register` treats the named register as a register that can be allocated to contain a value, and that value will be retained even through a function call. Functions compiled with this option set must save and restore the contents of the register. This option must not be specified for registers that have fixed roles (such as a frame pointer or stack pointer). The register names are platform dependent and are named in the REGISTER_NAMES macro of the machine description.
These switches are handled by fix_register. Notice the call_used parameter passed to the function.
720 void
721 fix_register (const char *name, int fixed, int call_used) in regclass.c
722 {
723 int i;
724
725 /* Decode the name and update the primary form of
726 the register info. */
727
728 if ((i = decode_reg_name (name)) >= 0)
729 {
730 if ((i == STACK_POINTER_REGNUM
731 #ifdef HARD_FRAME_POINTER_REGNUM
732 || i == HARD_FRAME_POINTER_REGNUM
733 #else
734 || i == FRAME_POINTER_REGNUM
735 #endif
736 )
737 && (fixed == 0 || call_used == 0))
738 {
739 static const char * const what_option[2][2] = {
740 { "call-saved", "call-used" },
741 { "no-such-option", "fixed" }};
742
743 error ("can't use '%s' as a %s register", name,
744 what_option[fixed][call_used]);
745 }
746 else
747 {
748 fixed_regs[i] = fixed;
749 call_used_regs[i] = call_used;
750 #ifdef CALL_REALLY_USED_REGISTERS
751 if (fixed == 0)
752 call_really_used_regs[i] = call_used;
753 #endif
754 }
755 }
756 else
757 {
758 warning ("unknown register name: %s", name);
759 }
760 }
decode_reg_name is used to fetch the register’s name. For assemble language, like that used in Linux, uses ‘%’ prefix when referring registers; at line 681 below, strip_reg_name gets rid of this prefix. Notice that, we can use decimal number or literal name to select the register. For literal name, it is matched by reg_names which contains names from REGISTER_NAMES.
673 int
674 decode_reg_name (const char *asmspec) in varasm.c
675 {
676 if (asmspec != 0)
677 {
678 int i;
679
680 /* Get rid of confusing prefixes. */
681 asmspec = strip_reg_name (asmspec);
682
683 /* Allow a decimal number as a "register name". */
684 for (i = strlen (asmspec) - 1; i >= 0; i--)
685 if (! ISDIGIT (asmspec[i]))
686 break;
687 if (asmspec[0] != 0 && i < 0)
688 {
689 i = atoi (asmspec);
690 if (i < FIRST_PSEUDO_REGISTER && i >= 0)
691 return i;
692 else
693 return -2;
694 }
695
696 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
697 if (reg_names[i][0]
698 && ! strcmp (asmspec, strip_reg_name (reg_names [i])))
699 return i;
700
701 #ifdef ADDITIONAL_REGISTER_NAMES
702 {
703 static const struct { const char *const name; const int number; } table[]
704 = ADDITIONAL_REGISTER_NAMES;
705
706 for (i = 0; i < (int) ARRAY_SIZE (table); i++)
707 if (! strcmp (asmspec, table[i].name))
708 return table[i].number;
709 }
710 #endif /* ADDITIONAL_REGISTER_NAMES */
711
712 if (!strcmp (asmspec, "memory"))
713 return -4;
714
715 if (!strcmp (asmspec, "cc"))
716 return -3;
717
718 return -2;
719 }
720
721 return -1;
722 }
For x86 machine, REGISTER_NAMES defines names of basic register set. This part of registers can be indexed by compiler's hard-register-number (decimal number).
2746 #define HI_REGISTER_NAMES / in i386.h
2747 {"ax","dx","cx","bx","si","di","bp","sp", /
2748 "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)", /
2749 "argp", "flags", "fpsr", "dirflag", "frame", /
2750 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", /
2751 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" , /
2752 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", /
2753 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"}
2754
2755 #define REGISTER_NAMES HI_REGISTER_NAMES
Further for x86 machine, ADDITIONAL_REGISTER_NAMES defines additional registers on the platform, however, these registers can’t be indexed by compiler's hard-register-number.
2759 #define ADDITIONAL_REGISTER_NAMES / in i386.h
2760 { { "eax", 0 }, { "edx", 1 }, { "ecx", 2 }, { "ebx", 3 }, /
2761 { "esi", 4 }, { "edi", 5 }, { "ebp", 6 }, { "esp", 7 }, /
2762 { "rax", 0 }, { "rdx", 1 }, { "rcx", 2 }, { "rbx", 3 }, /
2763 { "rsi", 4 }, { "rdi", 5 }, { "rbp", 6 }, { "rsp", 7 }, /
2764 { "al", 0 }, { "dl", 1 }, { "cl", 2 }, { "bl", 3 }, /
2765 { "ah", 0 }, { "dh", 1 }, { "ch", 2 }, { "bh", 3 }, /
2766 { "mm0", 8}, { "mm1", 9}, { "mm2", 10}, { "mm3", 11}, /
2767 { "mm4", 12}, { "mm5", 13}, { "mm6", 14}, { "mm7", 15} }
Then in fix_register, at line 748, fixed_regs is indexed by hard register number, it contains 1 in the bit corresponding to register that has fixed use. These are the registers that cannot be used to allocate a pseudo register for general use.
At line 749, call_used_regs is indexed by hard register number, it contains 1 in the bit corresponding to register that has fixed use or is clobbered by function calls. These are the registers that cannot be used to allocate a pseudo register whose life crosses calls unless we are able to save/restore them across the calls.
At line 750, CALL_REALLY_USED_REGISTERS is not defined upon our target platform, so call_really_used_regs is also not declared.
Then back common_handle_option, at line 922, flag_no_common (-fcommon), specifying -fno-common will cause the compiler to make an explicit allocation of space in the data section for each global variable. The default is to allocate them in a common block that is resolved by the linker, so declaring the same global variable more than once causes the linker to resolve the two into one. You can specify -fno-common to verify that the program will compile and link on another system that does not use GCC.
And at line 942, flag_data_sections (-fdata-sections), if nonzero, each data item is placed into its own named section in the assembly language output. The section name is derived from the name of the data item. This only has an advantage on machines with a linker that can use sectioning to optimize allocation of space. For the same optimization with the executable code, see -ffunction-sections. Setting this option for a machine that does not support sectioning in its assembler code will result in a warning message, and the option will be ignored. Even on machines that support such sectioning there will be no advantage unless that linker uses the organization for optimization. In fact, it could have a detrimental effect by making the object code larger and slower to load.
If the -p option is set for profiling, this option (-fdata-sections) will have no effect. Also, because of the rearrangement of the code, you may have problems with the -g option and debugging.
And at line 950, flag_delayed_branch (-fdelayed-branch), if nonzero, this flag only has effect on machines with delayed branch slots. This has to do with loading and executing instructions at the same time the decision is being made whether to take a branch. After the decision is made the result of the instruction may be discarded depending on the location of the instruction and the decision made. This flag is set by every level of optimization if the target machine supports it, but it can be overridden by -fno-delayed-branch.
common_handle_option (continue)
957 case OPT_fdiagnostics_show_location_:
958 if (!strcmp (arg, "once"))
959 diagnostic_prefixing_rule (global_dc) = DIAGNOSTICS_SHOW_PREFIX_ONCE;
960 else if (!strcmp (arg, "every-line"))
961 diagnostic_prefixing_rule (global_dc)
962 = DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE;
963 else
964 return 0;
965 break;
966
967 case OPT_fdump_unnumbered:
968 flag_dump_unnumbered = value;
969 break;
970
971 case OPT_feliminate_dwarf2_dups:
972 flag_eliminate_dwarf2_dups = value;
973 break;
974
975 case OPT_feliminate_unused_debug_types:
976 flag_eliminate_unused_debug_types = value;
977 break;
978
979 case OPT_feliminate_unused_debug_symbols:
980 flag_debug_only_used_symbols = value;
981 break;
982
983 case OPT_fexceptions:
984 flag_exceptions = value;
985 break;
986
987 case OPT_fexpensive_optimizations:
988 flag_expensive_optimizations = value;
989 break;
990
991 case OPT_ffast_math:
992 set_fast_math_flags (value);
993 break;
994
995 case OPT_ffinite_math_only:
996 flag_finite_math_only = value;
997 break;
998
999 case OPT_ffixed_:
1000 fix_register (arg, 1, 1);
1001 break;
1002
1003 case OPT_ffunction_cse:
1004 flag_no_function_cse = !value;
1005 break;
1006
1007 case OPT_ffloat_store:
1008 flag_float_store = value;
1009 break;
1010
1011 case OPT_fforce_addr:
1012 flag_force_addr = value;
1013 break;
1014
1015 case OPT_fforce_mem:
1016 flag_force_mem = value;
1017 break;
1018
1019 case OPT_ffunction_sections:
1020 flag_function_sections = value;
1021 break;
1022
1023 case OPT_fgcse:
1024 flag_gcse = value;
1025 break;
1026
1027 case OPT_fgcse_lm:
1028 flag_gcse_lm = value;
1029 break;
1030
1031 case OPT_fgcse_sm:
1032 flag_gcse_sm = value;
1033 break;
1034
1035 case OPT_fgcse_las:
1036 flag_gcse_las = value;
1037 break;
1038
1039 case OPT_fguess_branch_probability:
1040 flag_guess_branch_prob = value;
1041 break;
1042
1043 case OPT_fident:
1044 flag_no_ident = !value;
1045 break;
1046
1047 case OPT_fif_conversion:
1048 flag_if_conversion = value;
1049 break;
1050
1051 case OPT_fif_conversion2:
1052 flag_if_conversion2 = value;
1053 break;
1054
1055 case OPT_finhibit_size_directive:
1056 flag_inhibit_size_directive = value;
1057 break;
1058
1059 case OPT_finline:
1060 flag_no_inline = !value;
1061 break;
1062
1063 case OPT_finline_functions:
1064 flag_inline_functions = value;
1065 break;
This sinppet code using following variables.
flag_dump_unnumbered (-fdump-unnumbered), when debugging the compiler with the -d option, this option will suppress instruction numbers and line numbers in the output files, which makes it easier to use diff to compare dumps.
flag_eliminate_dwarf2_dups (-feliminate-dwarf2-dups) if nonzero, doing dwarf2 duplicate elimination.
flag_eliminate_unused_debug_types (-feliminate-unused-debug-types) if nonzero, doing unused type elimination.
flag_debug_only_used_symbols (-feliminate-unused-debug-symbols) if nonzero, means emit debugging information only for symbols which are used.
flag_exceptions (-fexceptions) if nonzero, enables exception handling. This option generates the extra code necessary to process the throwing and catching of exceptions. If you do not specify this option, it will be specified automatically for languages such as Ada, Java, and C++ that normally throw exceptions.
flag_finite_math_only (-ffinite-math-only) if nonzero, means that no NaNs or +-Infs are expected.
flag_no_function_cse (-fno-function-cse) if nonzero, will cause each instruction making a call to implicitly include the address of the function. If zero (-ffunction-cse), function calls are made with the function address stored in a register, which produces more efficient code.
flag_float_store (-ffloat-store) if nonzero, doesn’t allocate registers to hold floating point values. On some machines this option may cause the registers to extend the precision beyond that defined for the language, and thus will carry more precision than the floating point data stored in memory. This flag is only useful if your program must restrict its precision to exactly that defined by the IEEE standard.
flag_force_addr (-ffloat-addr) if nonzero, address must be copied into registers to have arithmetic performed on them. This improves the generated code because addresses needed will often have been previously loaded into a register and do not need to be loaded again.
flag_function_sections (-ffunction-sections) if nonzero, each function is placed into its own named section in the assembly language output. The section name is derived from the function name. This only has an advantage on machines with a linker that can use sectioning to optimize allocation of space. For the same optimization with data, see -fdata-sections. Setting this option for a machine that does not support sectioning in its assembler code will result in a warning message, and the option will be ignored. Even on machines that support such sectioning there will be no advantage unless that linker uses the organization for optimization. In fact, it could have a detrimental effect by making the object code larger and slower to load.
If the -p option is set for profiling, this option will have no effect. Also, because of the rearrangement of the code, you may have problems with the -g option and debugging.
flag_gcse_lm (-fgcse-lm) if nonzero, performs global common subexpression elmination optimization by detecting load and store operations inside a loop, in which the load operation is in a form that can be moved in front of the loop and thus only occur once. This is the default, but will have no effect except when -Os is set. It can be overridden with -no-fgcse-lm.
flag_gcse_sm (-fgcse-sm) if nonzero, performs global common subexpression elmination optimization by detecting load and store operations inside a loop, in which the store operation is in a form that can be moved after the loop and thus only occur once. This is the default, but will have no effect except when -Os is set. It can be overridden with -no-fgcse-sm.
flag_gcse_las (-fgcse-las) if nonzero, the global common subexpression elimination pass eliminates redundant loads that come after stores to the same memory location (both partial and full redundancies).
flag_no_ident (-fno-ident) if nonzero, means ignore #ident directives. The #ident directive takes one argument, a string constant. On some systems, that string constant is copied into a special segment of the object file. On other systems, the directive is ignored.
flag_inhibit_size_directive (-finhibit-size-directive) if nonzero, doesn’t output a .size assembler directive, nor any other directive that could cause problems if the function is split and the two halves are separated in memory. This option is a special case used to compile crtstuff.c (part of GCC) and is not expected to have any other purpose.
For -ffast-math, certain mathematical calculations are made faster by violating some of the ISO and IEEE rules. For example, with this option set it is assumed that no negative values are passed to sqrt() and that all floating point values are valid. Setting this option causes the preprocessor macro __FAST_MATH__ to be defined
This switch is handled by set_fast_math_flags.
1590 void
1591 set_fast_math_flags (int set) in opts.c
1592 {
1593 flag_trapping_math = !set;
1594 flag_unsafe_math_optimizations = set;
1595 flag_finite_math_only = set;
1596 flag_errno_math = !set;
1597 if (set)
1598 {
1599 flag_signaling_nans = 0;
1600 flag_rounding_math = 0;
1601 }
1602 }
In this function,
flag_trapping_math (-ftrapping-math) if zero, means that floating-point math operations cannot generate a (user-visible) trap. This is the case, for example, in nonstop IEEE 754 arithmetic. Trapping conditions include division by zero, overflow, underflow, invalid and inexact, but does not include operations on signaling NaNs.
flag_unsafe_math_optimizations (-funsafe-math-optimizations) if nonzero, means that unsafe floating-point math optimizations are allowed for the sake of speed. IEEE compliance is not guaranteed, and operations are allowed to assume that their arguments and results are "normal" (e.g., nonnegative for SQRT).
flag_errno_math, if nonzero, means the front end generally wants `errno' maintained by math operations, like built-in SQRT.
flag_signaling_nans (-fsignaling-nans) if nonzero, means disable transformations observable by signaling NaNs. This option implies that any operation on an IEEE signaling NaN can generate a (user-visible) trap.
flag_rounding_math (-frounding-math) if nonzero, means disable transformations that assume default floating point rounding behavior.
common_handle_option (continue)
1067 case OPT_finline_limit_:
1068 case OPT_finline_limit_eq:
1069 set_param_value ("max-inline-insns-single", value / 2);
1070 set_param_value ("max-inline-insns-auto", value / 2);
1071 set_param_value ("max-inline-insns-rtl", value);
1072 break;
1073
1074 case OPT_finstrument_functions:
1075 flag_instrument_function_entry_exit = value;
1076 break;
1077
1078 case OPT_fkeep_inline_functions:
1079 flag_keep_inline_functions =value;
1080 break;
1081
1082 case OPT_fkeep_static_consts:
1083 flag_keep_static_consts = value;
1084 break;
1085
1086 case OPT_fleading_underscore:
1087 flag_leading_underscore = value;
1088 break;
1089
1090 case OPT_floop_optimize:
1091 flag_loop_optimize = value;
1092 break;
1093
1094 case OPT_fmath_errno:
1095 flag_errno_math = value;
1096 break;
1097
1098 case OPT_fmem_report:
1099 mem_report = value;
1100 break;
1101
1102 case OPT_fmerge_all_constants:
1103 flag_merge_constants = value + value;
1104 break;
1105
1106 case OPT_fmerge_constants:
1107 flag_merge_constants = value;
1108 break;
1109
1110 case OPT_fmessage_length_:
1111 pp_set_line_maximum_length (global_dc->printer, value);
1112 break;
1113
1114 case OPT_fmove_all_movables:
1115 flag_move_all_movables = value;
1116 break;
1117
1118 case OPT_fnew_ra:
1119 flag_new_regalloc = value;
1120 break;
1121
1122 case OPT_fnon_call_exceptions:
1123 flag_non_call_exceptions = value;
1124 break;
1125
1126 case OPT_fold_unroll_all_loops:
1127 flag_old_unroll_all_loops = value;
1128 break;
1129
1130 case OPT_fold_unroll_loops:
1131 flag_old_unroll_loops = value;
1132 break;
1133
1134 case OPT_fomit_frame_pointer:
1135 flag_omit_frame_pointer = value;
1136 break;
1137
1138 case OPT_foptimize_register_move:
1139 flag_regmove = value;
1140 break;
1141
1142 case OPT_foptimize_sibling_calls:
1143 flag_optimize_sibling_calls = value;
1144 break;
1145
1146 case OPT_fpack_struct:
1147 flag_pack_struct = value;
1148 break;
1149
1150 case OPT_fpeel_loops:
1151 flag_peel_loops_set = true;
1152 flag_peel_loops = value;
1153 break;
1154
1155 case OPT_fpcc_struct_return:
1156 flag_pcc_struct_return = value;
1157 break;
1158
1159 case OPT_fpeephole:
1160 flag_no_peephole = !value;
1161 break;
1162
1163 case OPT_fpeephole2:
1164 flag_peephole2 = value;
1165 break;
1166
1167 case OPT_fpic:
1168 flag_pic = value;
1169 break;
1170
1171 case OPT_fpie:
1172 flag_pie = value;
1173 break;
1174
1175 case OPT_fprefetch_loop_arrays:
1176 flag_prefetch_loop_arrays = value;
1177 break;
Above, at line 1067 and 1068, switches -finline-limit and –finline-limit= overwrite "max-inline-insns-single", "max-inline-insns-auto", and "max-inline-insns-rtl" (refer to Table 9: target_switches for i386 system).
flag_instrument_function_entry_exit (-finstrument-functions) if nonzero, inserts code that will call a function at the entry and exit point of each function. The prototypes of the function calls are as follow:
void __cyg_profile_func_enter(void *this_fn,void *call_site);
void __cyg_profile_func_exit(void *this_fn,void *call_site);
The this_fn argument is the address of the function being called, which can be identified by symbol table information. The call_site argument identifies the caller. (On some platforms the call_site information is not available.) If a function is expanded inline, the function calls are inserted before and after the inline code. For purposes of identification there must be a non-inline version of the function available, even if all the calls to it generate inline code.
To prevent a function from having the code inserted, it can be declared with the attribute no_instrument_function. This may be necessary for interrupt handlers and functions from which the profiling routines cannot be called.
flag_keep_inline_functions (-fkeep-inline-functions) if nonzero, the compiler will generate a body for a function even if all of the references to it are expanded inline and there are actually no calls to it. The default is -fno-keep-inline-functions, which does not create bodies for functions that are not called.
flag_keep_static_consts (-fkeep-static-consts) if nonzero, constant values that are private to the compilation unit are allocated storage even if they are not referenced. To prevent allocating space for unused constants, use -fno-keep-static-consts.
flag_leading_underscore (-fleading-underscore) if nonzero, forces each symbol written to the object file to be modified to begin with an underscore character. The option -fno-leading-underscore will suppress the addition of an underscore character. This option is available for use when attempting to link with legacy assembly code.
mem_report (-fmem-report) if nonzero, when the compiler finishes, it prints a detailed listing of the amount of storage that has been allocated for each data type and other permanent memory allocation information.
flag_move_all_movables (-fmove-all-movables) if nonzero, all invariant expressions are moved outside the loop. Whether this produces better or worse code depends on the structure of the loops in the source code. The default is -fno-move-all-movables except for Fortran.
flag_new_regalloc (-fnew-ra) if nonzero, uses the graph coloring register allocator.
flag_non_call_exceptions (-fnon-call-exceptions) if nonzero, generates code that makes it possible for trapping instructions (such as invalid floating point operations or invalid memory addressing) to throw exceptions. This option is not universally available because it requires platform-specific runtime support. This is limited to hardware trap signals only and does not include general signals such as SIGALRM or SIGTERM.
flag_old_unroll_all_loops (-fold-unroll-all-loops) if nonzero, ables loop unrolling in unroll.c. All loops are unrolled. This is generally not a win.
flag_old_unroll_loops (-fold-unroll-loops) if nonzero, enables loop unrolling in unroll.c. Only loops for which the number of iterations can be calculated at compile-time (UNROLL_COMPLETELY, UNROLL_MODULO) or at run-time (preconditioned to be UNROLL_MODULO) are unrolled.
flag_optimize_sibling_calls (-foptimize-sibling-calls) if nonzero, optimizes recursive tail calls and sibling calls. This flag is automatically set by -O2, -O3, and -Os. The default is -fno-optimize-sibling-calls. The following is an example of a recursive tail call:
int rewhim(int x,int y) {
. . .
return(rewhim(x+1,y));
}
Optimization can be performed by, instead of making a new function call, inserting a command that jumps to the top of the function. A similar situation is shown in the following example of a sibling call:
int whim(int x,int y) {
. . .
return(wham(x+1,y));
}
In a sibling call, the call to the function wham() must be made, but the stack frame of whim() can be deleted by the call, causing wham() to return its value directly to the caller of whim().
flag_pack_struct (-fpack-struct) if nonzero, packs the members of structures together in such a way that no alignment space is inserted between the members of the structure. This could cause the executable code accessing structure members to be less efficient, and it could also cause the code to be incompatible with the system libraries.
flag_peel_loops (-fpeel-loops) if nonzero, enables loop peeling.
flag_no_peephole (-fno-peephole) if nonzeo, disables any machine-specific peephole optimizations..
flag_prefetch_loop_arrays (-fprefetch-loop-arrays) if nonzero, enables prefetch optimizations for arrays in loops.