GCC提供了一个称为内建函数的设施。它们的行为与我们编程时用到的库函数类似。事实上你可以你的程序中象使用库函数那样使用这些内建函数,你甚至不需要包含任何头文件。
显然,这些函数不会是,我们从头文件看到的,那种函数声明的形式。它们不能出现在任何头文件中。这些函数在GCC内部得到处理,它们必须是rtx的形式。某些函数只是表明了期望的操作,在展开后它们将被代码片段所替代(例如著名的,在linux代码中常见的builtin_const);而某些内建函数则需要调用在libiberty——GCC所使用的库中定义的函数。作为系统启动的一步,内建函数的内部表示形式需要被创建,并加入合适的名字空间——这由c_common_nodes_and_builtins完成。
cxx_init_decl_processing (continue)
3000 c_common_nodes_and_builtins ();
c_common_nodes_and_builtins首先创建内建类型。作为一种通用的方式,GCC倾向于使用文件来描述细节。这里对于这些内建类型,使用了文件“builtin-types.def”,这个文件提供了一种方式,当声明内建函数时,描述其所使用的类型。在包含这个文件之前,需要定义以下的宏:
DEF_PRIMITIVE_TYPE (ENUM, TYPE)
ENUM是标识符,表示哪个类型将被定义。TYPE是代表类型的表达式树。
DEF_FUNCTION_TYPE_0 (ENUM, RETURN)
DEF_FUNCTION_TYPE_1 (ENUM, RETURN, ARG1)
DEF_FUNCTION_TYPE_2 (ENUM, RETURN, ARG1, ARG2)
DEF_FUNCTION_TYPE_3 (ENUM, RETURN, ARG1, ARG2, ARG3)
DEF_FUNCTION_TYPE_4 (ENUM, RETURN, ARG1, ARG2, ARG3, ARG4)
这些宏描述函数类型。ENUM含义与上同。RETURN类型是已经定义的一个枚举值。ARG1,ARG2及ARG3类似地给出了参数的类型。
DEF_FUNCTION_TYPE_VAR_0 (ENUM, RETURN)
DEF_FUNCTION_TYPE_VAR_1 (ENUM, RETURN, ARG1)
DEF_FUNCTION_TYPE_VAR_2 (ENUM, RETURN, ARG1, ARG2)
DEF_FUNCTION_TYPE_VAR_3 (ENUM, RETURN, ARG1, ARG2, ARG3)
类似的,是接受可变参数的函数类型,例如:
DEF_FUNCTION_TYPE_1 (BT_INT_DOUBLE, BT_INT, BT_DOUBLE)
使用枚举值BT_INT_DOUBLE描述了类型`int ()(double)',而:
DEF_FUNCTION_TYPE_VAR_1 (BT_INT_DOUBLE_VAR, BT_INT, BT_DOUBLE)
描述了类型`int ()(double, ...)'。
DEF_POINTER_TYPE (ENUM, TYPE)
这个宏描述了一个指针类型。ENUM含义同上,TYPE是所指向的类型。
3057 void
3058 c_common_nodes_and_builtins (void) in c-common.c
3059 {
3060 enum builtin_type
3061 {
3062 #define DEF_PRIMITIVE_TYPE(NAME, VALUE) NAME,
3063 #define DEF_FUNCTION_TYPE_0(NAME, RETURN) NAME,
3064 #define DEF_FUNCTION_TYPE_1(NAME, RETURN, ARG1) NAME,
3065 #define DEF_FUNCTION_TYPE_2(NAME, RETURN, ARG1, ARG2) NAME,
3066 #define DEF_FUNCTION_TYPE_3(NAME, RETURN, ARG1, ARG2, ARG3) NAME,
3067 #define DEF_FUNCTION_TYPE_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME,
3068 #define DEF_FUNCTION_TYPE_VAR_0(NAME, RETURN) NAME,
3069 #define DEF_FUNCTION_TYPE_VAR_1(NAME, RETURN, ARG1) NAME,
3070 #define DEF_FUNCTION_TYPE_VAR_2(NAME, RETURN, ARG1, ARG2) NAME,
3071 #define DEF_FUNCTION_TYPE_VAR_3(NAME, RETURN, ARG1, ARG2, ARG3) NAME,
3072 #define DEF_POINTER_TYPE(NAME, TYPE) NAME,
3073 #include "builtin-types.def"
3074 #undef DEF_PRIMITIVE_TYPE
3075 #undef DEF_FUNCTION_TYPE_0
3076 #undef DEF_FUNCTION_TYPE_1
3077 #undef DEF_FUNCTION_TYPE_2
3078 #undef DEF_FUNCTION_TYPE_3
3079 #undef DEF_FUNCTION_TYPE_4
3080 #undef DEF_FUNCTION_TYPE_VAR_0
3081 #undef DEF_FUNCTION_TYPE_VAR_1
3082 #undef DEF_FUNCTION_TYPE_VAR_2
3083 #undef DEF_FUNCTION_TYPE_VAR_3
3084 #undef DEF_POINTER_TYPE
3085 BT_LAST
3086 };
3087
3088 typedef enum builtin_type builtin_type;
3089
3090 tree builtin_types[(int) BT_LAST];
3091 int wchar_type_size;
3092 tree array_domain_type;
3093 tree va_list_ref_type_node;
3094 tree va_list_arg_type_node;
3095
3096 /* Define `int' and `char' first so that dbx will output them first. */
3097 record_builtin_type (RID_INT, NULL, integer_type_node);
3098 record_builtin_type (RID_CHAR, "char", char_type_node);
3099
3100 /* `signed' is the same as `int'. FIXME: the declarations of "signed",
3101 "unsigned long", "long long unsigned" and "unsigned short" were in C++
3102 but not C. Are the conditionals here needed? */
3103 if (c_dialect_cxx ())
3104 record_builtin_type (RID_SIGNED, NULL, integer_type_node);
3105 record_builtin_type (RID_LONG, "long int", long_integer_type_node);
3106 record_builtin_type (RID_UNSIGNED, "unsigned int", unsigned_type_node);
3107 record_builtin_type (RID_MAX, "long unsigned int",
3108 long_unsigned_type_node);
3109 if (c_dialect_cxx ())
3110 record_builtin_type (RID_MAX, "unsigned long", long_unsigned_type_node);
3111 record_builtin_type (RID_MAX, "long long int",
3112 long_long_integer_type_node);
3113 record_builtin_type (RID_MAX, "long long unsigned int",
3114 long_long_unsigned_type_node);
3115 if (c_dialect_cxx ())
3116 record_builtin_type (RID_MAX, "long long unsigned",
3117 long_long_unsigned_type_node);
3118 record_builtin_type (RID_SHORT, "short int", short_integer_type_node);
3119 record_builtin_type (RID_MAX, "short unsigned int",
3120 short_unsigned_type_node);
3121 if (c_dialect_cxx ())
3122 record_builtin_type (RID_MAX, "unsigned short",
3123 short_unsigned_type_node);
3124
3125 /* Define both `signed char' and `unsigned char'. */
3126 record_builtin_type (RID_MAX, "signed char", signed_char_type_node);
3127 record_builtin_type (RID_MAX, "unsigned char", unsigned_char_type_node);
函数record_builtin_type将内建类型的声明加入到全局名字空间中。其参数rid_index是内建类型在数组RID_POINTERS中的索引,name是用于查找内建类型的名字,type是内建类型的*_TYPE节点。注意到,type节点是在build_common_tree_nodes中创建的。
2729 void
2730 record_builtin_type (enum rid rid_index, in decl.c
2731 const char* name,
2732 tree type)
2733 {
2734 tree rname = NULL_TREE, tname = NULL_TREE;
2735 tree tdecl = NULL_TREE;
2736
2737 if ((int) rid_index < (int) RID_MAX)
2738 rname = ridpointers[(int) rid_index];
2739 if (name)
2740 tname = get_identifier (name);
2741
2742 /* The calls to SET_IDENTIFIER_GLOBAL_VALUE below should be
2743 eliminated. Built-in types should not be looked up name; their
2744 names are keywords that the parser can recognize. However, there
2745 is code in c-common.c that uses identifier_global_value to look
2746 up built-in types by name. */
2747 if (tname)
2748 {
2749 tdecl = build_decl (TYPE_DECL, tname, type);
2750 DECL_ARTIFICIAL (tdecl) = 1;
2751 SET_IDENTIFIER_GLOBAL_VALUE (tname, tdecl);
2752 }
2753 if (rname)
2754 {
2755 if (!tdecl)
2756 {
2757 tdecl = build_decl (TYPE_DECL, rname, type);
2758 DECL_ARTIFICIAL (tdecl) = 1;
2759 }
2760 SET_IDENTIFIER_GLOBAL_VALUE (rname, tdecl);
2761 }
2762
2763 if (!TYPE_NAME (type))
2764 TYPE_NAME (type) = tdecl;
2765
2766 if (tdecl)
2767 {
2768 TREE_CHAIN (tdecl) = builtin_type_decls;
2769 builtin_type_decls = tdecl;
2770 }
2771 }
若仔细查看reswords,会有趣地发现,例如:在上面的3105行,“long”对应着RID_LONG,但在这里我们期望是“long int”——这是给程序员用的名字;而对于那些不准备开放给程序员的类型,NULL将传给name参数。
在上面的代码中,显然如果参数name不是NULL,用作类型的名字,它的优先级高于rid_index所对应的reswords。
262 #define SET_IDENTIFIER_GLOBAL_VALUE(NODE, VAL) / in cp-tree.h
263 set_namespace_binding ((NODE), global_namespace, (VAL))
SET_IDENTIFIER_GLOBAL_VALUE将NODE加入全局名字空间。结果,我们将得到下面的图(仅显示RID_LONG)。
图35:内建类型的节点
注意到由set_namespace_binding加入的节点期望能通过get_identifier来访问。而沿着链namespace_bindings及bindings field,所有定义了该标识符的域都可被找出。
在前面看到,对于int,short这样的整型,在不同的平台上有不同的大小,为此编译器还维护着一组表示标准大小的整型节点。现在也需要将它们加入全局名字空间,使其堪用。
c_common_nodes_and_builtins (continue)
3129 /* These are types that c_common_type_for_size and
3130 c_common_type_for_mode use. */
3131 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3132 intQI_type_node));
3133 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3134 intHI_type_node));
3135 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3136 intSI_type_node));
3137 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3138 intDI_type_node));
3139 #if HOST_BITS_PER_WIDE_INT >= 64
3140 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
3141 get_identifier ("__int128_t"),
3142 intTI_type_node));
3143 #endif
3144 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3145 unsigned_intQI_type_node));
3146 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3147 unsigned_intHI_type_node));
3148 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3149 unsigned_intSI_type_node));
3150 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3151 unsigned_intDI_type_node));
3152 #if HOST_BITS_PER_WIDE_INT >= 64
3153 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
3154 get_identifier ("__uint128_t"),
3155 unsigned_intTI_type_node));
3156 #endif
int*_type_node是表示整型类型大小的节点,它们也是在函数build_common_tree_nodes中创建的,并且是上一节中,不超过32比特的,*_type节点的别名。对于这些节点,传给build_decl的第二个参数是NULL,因为这些节点不对应于任何标识符(即不能由用户访问)。它将被设置入所创建的TYPE_DECL节点的name域。lang_hooks的钩子pushdecl,对于C++还是函数pushdecl。
566 tree
567 pushdecl (tree x) in name-lookup.c
568 {
569 tree t;
570 tree name;
571 int need_new_binding;
572
573 timevar_push (TV_NAME_LOOKUP);
574
575 need_new_binding = 1;
…
604 name = DECL_NAME (x);
605 if (name)
606 {
...
1007 }
1008
1009 if (need_new_binding)
1010 add_decl_to_level (x,
1011 DECL_NAMESPACE_SCOPE_P (x)
1012 ? NAMESPACE_LEVEL (CP_DECL_CONTEXT (x))
1013 : current_binding_level);
1014
1015 POP_TIMEVAR_AND_RETURN (TV_NAME_LOOKUP, x);
1016 }
对于这些节点,在604行提取的name是NULL。它们在1013行被current_binding_level返回,并由add_decl_to_level加入全局名字空间。
在执行c_common_nodes_and_builtins的3152行之前,简化的布局显示于下图(显示与intTI_type_node相关的节点,及链接在names域的第一个节点)。
图36:内建类型的节点2
而在上面宏HOST_BITS_PER_WIDE_INT有以下的定义:
47 #if HOST_BITS_PER_LONG >= 64 || !defined NEED_64BIT_HOST_WIDE_INT in hwint.h
48 # define HOST_BITS_PER_WIDE_INT HOST_BITS_PER_LONG
49 # define HOST_WIDE_INT long
50 #else
51 # if HOST_BITS_PER_LONGLONG >= 64
52 # define HOST_BITS_PER_WIDE_INT HOST_BITS_PER_LONGLONG
53 # define HOST_WIDE_INT long long
54 # else
55 # if HOST_BITS_PER___INT64 >= 64
56 # define HOST_BITS_PER_WIDE_INT HOST_BITS_PER___INT64
57 # define HOST_WIDE_INT __int64
58 # else
59 #error "Unable to find a suitable type for HOST_WIDE_INT"
60 # endif
61 # endif
62 #endif
对于32位系统,NEED_64BIT_HOST_WIDE_INT是没有定义的。因此HOST_WIDE_INT就是long,而HOST_BITS_PER_WIDE_INT是32。由此也可看到,对于64位系统,GCC定义了__int128_t及__uint128_t给程序员。
c_common_nodes_and_builtins (continue)
3158 /* Create the widest literal types. */
3159 widest_integer_literal_type_node
3160 = make_signed_type (HOST_BITS_PER_WIDE_INT * 2);
3161 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3162 widest_integer_literal_type_node));
3163
3164 widest_unsigned_literal_type_node
3165 = make_unsigned_type (HOST_BITS_PER_WIDE_INT * 2);
3166 (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL, NULL_TREE,
3167 widest_unsigned_literal_type_node));
3168
3169 /* `unsigned long' is the standard type for sizeof.
3170 Note that stddef.h uses `unsigned long',
3171 and this must agree, even if long and int are the same size. */
3172 size_type_node =
3173 TREE_TYPE (identifier_global_value (get_identifier (SIZE_TYPE)));
3174 signed_size_type_node = c_common_signed_type (size_type_node);
3175 set_sizetype (size_type_node);
对于32位系统,widest_integer_literal_type_node及widest_unsigned_literal_type_node对应着64位的有符号/无符号整型,它们亦不开放给程序员。3172行的size_type_node是sizeof操作符的类型,对于x86/Linux,它是“unsigned int”