声明是C++最重要的一部分。正是函数声明、类型声明、名字空间声明、变量声明等, 构成了C++强大灵活的特性。C++编译器中这部分机制的初始化显得十分重要而复杂,这个机制还生成了语言运行时环境。
cxx_init (continue)
410 cxx_init_decl_processing ();
2942 void
2943 cxx_init_decl_processing (void) in decl.c
2944 {
2945 tree void_ftype;
2946 tree void_ftype_ptr;
2947
2948 /* Create all the identifiers we need. */
2949 initialize_predefined_identifiers ();
除了语言本身的保留字,在编译器中,为了维护运行时环境,编译器需要插入代码,这些代码中需要用到,对编译器而言,预先定义了,并且含义、用途明确的标识符。毫无疑问,同名标识符不能再在用户的源程序中定义,但可以被小心使用(不需要任何前向声明)。
2900 static void
2901 initialize_predefined_identifiers (void) in decl.c
2902 {
2903 const predefined_identifier *pid;
2904
2905 /* A table of identifiers to create at startup. */
2906 static const predefined_identifier predefined_identifiers[] = {
2907 { "C++", &lang_name_cplusplus, 0 },
2908 { "C", &lang_name_c, 0 },
2909 { "Java", &lang_name_java, 0 },
2910 { CTOR_NAME, &ctor_identifier, 1 },
2911 { "__base_ctor", &base_ctor_identifier, 1 },
2912 { "__comp_ctor", &complete_ctor_identifier, 1 },
2913 { DTOR_NAME, &dtor_identifier, 1 },
2914 { "__comp_dtor", &complete_dtor_identifier, 1 },
2915 { "__base_dtor", &base_dtor_identifier, 1 },
2916 { "__deleting_dtor", &deleting_dtor_identifier, 1 },
2917 { IN_CHARGE_NAME, &in_charge_identifier, 0 },
2918 { "nelts", &nelts_identifier, 0 },
2919 { THIS_NAME, &this_identifier, 0 },
2920 { VTABLE_DELTA_NAME, &delta_identifier, 0 },
2921 { VTABLE_PFN_NAME, &pfn_identifier, 0 },
2922 { "_vptr", &vptr_identifier, 0 },
2923 { "__vtt_parm", &vtt_parm_identifier, 0 },
2924 { "::", &global_scope_name, 0 },
2925 { "std", &std_identifier, 0 },
2926 { NULL, NULL, 0 }
2927 };
2928
2929 for (pid = predefined_identifiers; pid->name; ++pid)
2930 {
2931 *pid->node = get_identifier (pid->name);
2932 if (pid->ctor_or_dtor_p)
2933 IDENTIFIER_CTOR_OR_DTOR_P (*pid->node) = 1;
2934 }
2935 }
对于每个预定义标识符,它们具有如下类型predefined_identifier。这些预定义标识符不允许用户重定义,因此其类型定义中的node被声明为const。
2888 typedef struct predefined_identifier in decl.c
2889 {
2890 /* The name of the identifier. */
2891 const char *const name;
2892 /* The place where the IDENTIFIER_NODE should be stored. */
2893 tree *const node;
2894 /* Nonzero if this is the name of a constructor or destructor. */
2895 const int ctor_or_dtor_p;
2896 } predefined_identifier;
在2906行的predefined_identifiers定义中,赋给predefined_identifier中的node的值来自cp_global_trees——每个节点在整个系统中都是唯一的。2933行的宏将节点标记为对应于构造/析构函数。
436 #define IDENTIFIER_CTOR_OR_DTOR_P(NODE) / in cp-tree.h
437 TREE_LANG_FLAG_3 (NODE)
实际上设置的是tree_common中lang_flag_3。
在C++中,在默认情况下,我们都是在全局名字空间下编写代码。这个名字空间,不需要我们特别声明,并在我们把代码输入编译器前,编译器会自动创建就绪。它的初始化就由cxx_init_decl_processing接下来完成。
cxx_init_decl_processing (continue)
2951 /* Fill in back-end hooks. */
2952 lang_missing_noreturn_ok_p = &cp_missing_noreturn_ok_p;
2953
2954 /* Create the global variables. */
2955 push_to_top_level ();
2956
2957 current_function_decl = NULL_TREE;
2958 current_binding_level = NULL;
2959 /* Enter the global namespace. */
2960 my_friendly_assert (global_namespace == NULL_TREE, 375);
2961 global_namespace = build_lang_decl (NAMESPACE_DECL, global_scope_name,
2962 void_type_node);
2963 begin_scope (sk_namespace, global_namespace);
2964
2965 current_lang_name = NULL_TREE;
在C++中,为了应对变量名冲突、变量命名空间污染问题,引入了名字空间。随之而来,变量作用域的规则要复杂得多。例如:
1 namespace A {
2 int f;
3 namespace B {
4 struct Z {
5 static float f;
6 void method () { f... }; // use Z::f
7 };
8 Z::f = 0.0f;
9 extern “C” int ci; // push into global namespace
10 void gFunc() { float f; ... } // use local f
11 void gFunc(int) { f.... } // use A::f
12 void gFunc(float) { Z::f... } // use Z::f
13 }
14 }
名字同为f的变量,同时被声明在名字空间、类及函数的定义中。它们可以共存,是因为根据C++的语法,在上述每处引用到f的地方,都只有其中一个定义可见。定义在函数定义中的f,被称为临时变量,因为这个变量的作用域及可见性仅限于该函数体。但定义在名字空间及类定义中的变量则不然,在名字空间和类定义外,通过合适的表达式,它们仍可能被访问。因此,函数体与名字空间及类定义体是相异的命名空间;而名字空间与类定义则是相类的命名空间。又因为定义在函数体内的变量,在函数体外不可见、不可用,编译器无需在函数体外维护其信息(保存于函数定义的数据结构中就足够了)。而定义于名字空间及类中的变量,编译器则需要精心维护其信息,以期在每一引用处,利用这些信息,根据语法规则,确定出有效的定义或给出恰当的错误信息。这个数据结构就是下面的saved_scope。
688 struct saved_scope GTY(()) in cp-tree.h
689 {
690 cxx_saved_binding *old_bindings;
691 tree old_namespace;
692 tree decl_ns_list;
693 tree class_name;
694 tree class_type;
695 tree access_specifier;
696 tree function_decl;
697 varray_type lang_base;
698 tree lang_name;
699 tree template_parms;
700 tree x_previous_class_type;
701 tree x_previous_class_values;
702 tree x_saved_tree;
703
704 HOST_WIDE_INT x_processing_template_decl;
705 int x_processing_specialization;
706 bool x_processing_explicit_instantiation;
707 int need_pop_function_context;
708
709 struct stmt_tree_s x_stmt_tree;
710
711 struct cp_binding_level *class_bindings;
712 struct cp_binding_level *bindings;
713
714 struct saved_scope *prev;
715 };
回到上面的例子,注意第9行,“extern “C” int ci;”要求编译器把“ci”加入全局名字空间,为此编译器需要临时回到全局名字空间。而在10行,编译器又必需恢复这个环境。因此,在需要编译器临时切换运行时环境的情况下,最好能缓存这个环境,并在切换回来时,根据缓存内容恢复。上面690行的cxx_saved_binding就用于这个目的。这是一个链表,所缓存环境中的声明依一定的顺序链接在一起。
4721 struct cxx_saved_binding GTY(()) in name-lookup.c
4722 {
4723 /* Link that chains saved C++ bindings for a given name into a stack. */
4724 cxx_saved_binding *previous;
4725 /* The name of the current binding. */
4726 tree identifier;
4727 /* The binding we're saving. */
4728 cxx_binding *binding;
4729 tree class_value;
4730 tree real_type_value;
4731 };
在这个结构中的previous域指向同一个域中前一个声明,以此类推,所有这个域中的声明被依一定的次序所链接。另外,相同名字的声明可以出现在程序的多个地方,只要在引用的每一处,编译器根据语法规则能确定唯一的目标。4728行的binding域是一个将声明与其作用域绑定一起的数据结构。其定义如下:
73 struct cxx_binding GTY(()) in name-lookup.h
74 {
75 /* Link to chain together various bindings for this name. */
76 cxx_binding *previous;
77 /* The non-type entity this name is bound to. */
78 tree value;
79 /* The type entity this name is bound to. */
80 tree type;
81 /* The scope at which this binding was made. */
82 cxx_scope *scope;
83 unsigned value_is_inherited : 1;
84 unsigned is_local : 1;
85 };
76行的previous域指向的就是另一个同名声明的绑定结构,所有同名的声明通过彼此cxx_binding的previous链在一起。这里的声明可能是类型声明,也可能是非类型声明,78、79行的value和type分别对应之。另外,在这个结构中,能访问到相应的作用域的数据结构,也是需要的,因此82行的scope将指向下面类型的作用域数据结构实例。
63 typedef struct cp_binding_level cxx_scope; in name-lookup.h
这个结构除了被用于名字空间及类的作用域外,还被用于C++概念中的其他作用域。例如:函数(被视为与{}块等价)、try/catch块、函数参数、for初始化值、模板参数、特化模板的参数,这些作用域类型信息保存在214行的kind域中。而200行的level_chain则指向上一级包含该作用域的作用域(注意,该作用域也是有效的)。
144 struct cp_binding_level GTY(()) in name-lookup.h
145 {
146 /* A chain of _DECL nodes for all variables, constants, functions,
147 and typedef types. These are in the reverse of the order
148 supplied. There may be OVERLOADs on this list, too, but they
149 are wrapped in TREE_LISTs; the TREE_VALUE is the OVERLOAD. */
150 tree names;
151
152 /* Count of elements in names chain. */
153 size_t names_size;
154
155 /* A chain of NAMESPACE_DECL nodes. */
156 tree namespaces;
157
158 /* An array of static functions and variables (for namespaces only) */
159 varray_type static_decls;
160
161 /* A chain of VTABLE_DECL nodes. */
162 tree vtables;
163
164 /* A dictionary for looking up user-defined-types. */
165 binding_table type_decls;
166
167 /* A list of USING_DECL nodes. */
168 tree usings;
169
170 /* A list of used namespaces. PURPOSE is the namespace,
171 VALUE the common ancestor with this binding_level's namespace. */
172 tree using_directives;
173
174 /* If this binding level is the binding level for a class, then
175 class_shadowed is a TREE_LIST. The TREE_PURPOSE of each node
176 is the name of an entity bound in the class. The TREE_TYPE is
177 the DECL bound by this name in the class. */
178 tree class_shadowed;
179
180 /* Similar to class_shadowed, but for IDENTIFIER_TYPE_VALUE, and
181 is used for all binding levels. In addition the TREE_VALUE is the
182 IDENTIFIER_TYPE_VALUE before we entered the class. */
183 tree type_shadowed;
184
185 /* A TREE_LIST. Each TREE_VALUE is the LABEL_DECL for a local
186 label in this scope. The TREE_PURPOSE is the previous value of
187 the IDENTIFIER_LABEL VALUE. */
188 tree shadowed_labels;
189
190 /* For each level (except not the global one),
191 a chain of BLOCK nodes for all the levels
192 that were entered and exited one level down. */
193 tree blocks;
194
195 /* The entity (namespace, class, function) the scope of which this
196 binding contour corresponds to. Otherwise NULL. */
197 tree this_entity;
198
199 /* The binding level which this one is contained in (inherits from). */
200 struct cp_binding_level *level_chain;
201
202 /* List of VAR_DECLS saved from a previous for statement.
203 These would be dead in ISO-conforming code, but might
204 be referenced in ARM-era code. These are stored in a
205 TREE_LIST; the TREE_VALUE is the actual declaration. */
206 tree dead_vars_from_for;
207
208 /* Binding depth at which this level began. */
209 int binding_depth;
210
211 /* The kind of scope that this object represents. However, a
212 SK_TEMPLATE_SPEC scope is represented with KIND set to
213 SK_TEMPALTE_PARMS and EXPLICIT_SPEC_P set to true. */
214 ENUM_BITFIELD (scope_kind) kind : 4;
215
216 /* True if this scope is an SK_TEMPLATE_SPEC scope. This field is
217 only valid if KIND == SK_TEMPLATE_PARMS. */
218 BOOL_BITFIELD explicit_spec_p : 1;
219
220 /* true means make a BLOCK for this level regardless of all else. */
221 unsigned keep : 1;
222
223 /* Nonzero if this level can safely have additional
224 cleanup-needing variables added to it. */
225 unsigned more_cleanups_ok : 1;
226 unsigned have_cleanups : 1;
227
228 /* 22 bits left to fill a 32-bit word. */
229 };
当对象声明在类中时,这些对象还要记录在178及183行,以便能快速地退到上一级类环境。193行的blocks则指向构成该作用域中局部作用域的代码块。197行的this_entity则指向这个作用域所从属的对象。
函数push_to_top_level把运行时环境切换回全局名字空间。在编译器不是处理函数定义时,全局变量scope_chain总是记录了当前绑定域。
在4796行的current_binding_level具有以下的定义。如果cfun非零,则表示正在处理一个函数定义,宏cp_function_chain访问的是cfun的language域;另一方面,由scope_chain维护其他的绑定上下文环境。
233 #define current_binding_level / in name-lookup.h
234 (*(cfun && cp_function_chain->bindings /
235 ? &cp_function_chain->bindings /
236 : &scope_chain->bindings))
在编译的过程中,往往需要临时切换绑定域。比如上面例子中第9行的“extern “C” int ci;”编译器首先需要回到全局名字空间,为ci创建相关的树节点。然后回到原来的绑定域。在这个过程中,scope_chain亦用于记录暂时退出的作用域,而编译器从全局名字空间返回时,将依据其内容恢复原先的作用域。
4785 void
4786 push_to_top_level (void) in name-lookup.c
4787 {
4788 struct saved_scope *s;
4789 struct cp_binding_level *b;
4790 cxx_saved_binding *old_bindings;
4791 int need_pop;
4792
4793 timevar_push (TV_NAME_LOOKUP);
4794 s = ggc_alloc_cleared (sizeof (struct saved_scope));
4795
4796 b = scope_chain ? current_binding_level : 0;
4797
4798 /* If we're in the middle of some function, save our state. */
4799 if (cfun)
4800 {
4801 need_pop = 1;
4802 push_function_context_to (NULL_TREE);
4803 }
4804 else
4805 need_pop = 0;
4806
4807 old_bindings = NULL;
4808 if (scope_chain && previous_class_type)
4809 old_bindings = store_bindings (previous_class_values, old_bindings);
4810
4811 /* Have to include the global scope, because class-scope decls
4812 aren't listed anywhere useful. */
4813 for (; b; b = b->level_chain)
4814 {
4815 tree t;
4816
4817 /* Template IDs are inserted into the global level. If they were
4818 inserted into namespace level, finish_file wouldn't find them
4819 when doing pending instantiations. Therefore, don't stop at
4820 namespace level, but continue until :: . */
4821 if (global_scope_p (b))
4822 break;
4823
4824 old_bindings = store_bindings (b->names, old_bindings);
4825 /* We also need to check class_shadowed to save class-level type
4826 bindings, since pushclass doesn't fill in b->names. */
4827 if (b->kind == sk_class)
4828 old_bindings = store_bindings (b->class_shadowed, old_bindings);
4829
4830 /* Unwind type-value slots back to top level. */
4831 for (t = b->type_shadowed; t; t = TREE_CHAIN (t))
4832 SET_IDENTIFIER_TYPE_VALUE (TREE_PURPOSE (t), TREE_VALUE (t));
4833 }
4834 s->prev = scope_chain;
4835 s->old_bindings = old_bindings;
4836 s->bindings = b;
4837 s->need_pop_function_context = need_pop;
4838 s->function_decl = current_function_decl;
4839
4840 scope_chain = s;
4841 current_function_decl = NULL_TREE;
4842 VARRAY_TREE_INIT (current_lang_base, 10, "current_lang_base");
4843 current_lang_name = lang_name_cplusplus;
4844 current_namespace = global_namespace;
4845 timevar_pop (TV_NAME_LOOKUP);
4846 }
注意4794行,在创建这个全局名字空间时,编译器亦为之分配saved_scope实例,因此,scope_chain总是有一个空的saved_scope实例作为结尾。如果当前处理的是函数,4799行的cfun不为null,push_function_context_to把当前的函数上下文保存在outer_function_chain中,就仿佛我们进入了当前函数的嵌套函数中,这个环境亦会在恢复时重建。
前面已经知道,同名的声明都通过cxx_binding结构连接在一起。在后面会看到,当前编译器的实现会把已失效声明的cxx_binding实例从相应的链表中删除;而在进入新的作用域时,又将相应的cxx_binding对象链入。创建新的作用域不是一个廉价的操作。考虑到对于类的定义,几乎每个用到类的程序都会有如下类方法的定义:
void A::m1 () { ... }
// back to global namespace
void A::m2 () { ... }
// back to global namespace
....
在m1处理完成时,编译器将退回上一级作用域(通常是全局名字空间),而接下来对m2的处理,编译器又要压入A的作用域。考虑到A可能拥有多个方法,值得为之缓存作用域对象。在上面4808行的previous_class_type就指向了最近一次退出的最外层的类对象(这时当前作用域要么回到previous_class_type,要么是在previous_class_type作用域外的非类作用域,否则previous_class_type及previous_class_values都为null),而previous_class_values则指向其中的声明。
4813行的FOR循环将当前作用域至全局名字空间(不含)内所有的声明进行保存。4831行的type_shadowed记录了作用域b所屏蔽掉的声明,当退到上一级作用域时,需要恢复被屏蔽的声明(通过下面的SET_IDENTIFIER_TYPE_VALUE设置对应的类型)。
403 #define SET_IDENTIFIER_TYPE_VALUE(NODE,TYPE) (TREE_TYPE (NODE) = (TYPE))
函数store_bindings则把作用域中的声明对象保存入cxx_saved_binding链表中。并最终在上面的4835行合并入scope_chain中。
4740 static cxx_saved_binding *
4741 store_bindings (tree names, cxx_saved_binding *old_bindings) in name-lookup.c
4742 {
4743 tree t;
4744 cxx_saved_binding *search_bindings = old_bindings;
4745
4746 timevar_push (TV_NAME_LOOKUP);
4747 for (t = names; t; t = TREE_CHAIN (t))
4748 {
4749 tree id;
4750 cxx_saved_binding *saved;
4751 cxx_saved_binding *t1;
4752
4753 if (TREE_CODE (t) == TREE_LIST)
4754 id = TREE_PURPOSE (t);
4755 else
4756 id = DECL_NAME (t);
4757
4758 if (!id
4759 /* Note that we may have an IDENTIFIER_CLASS_VALUE even when
4760 we have no IDENTIFIER_BINDING if we have left the class
4761 scope, but cached the class-level declarations. */
4762 || !(IDENTIFIER_BINDING (id) || IDENTIFIER_CLASS_VALUE (id)))
4763 continue;
4764
4765 for (t1 = search_bindings; t1; t1 = t1->previous)
4766 if (t1->identifier == id)
4767 goto skip_it;
4768
4769 my_friendly_assert (TREE_CODE (id) == IDENTIFIER_NODE, 135);
4770 saved = cxx_saved_binding_make ();
4771 saved->previous = old_bindings;
4772 saved->identifier = id;
4773 saved->binding = IDENTIFIER_BINDING (id);
4774 saved->class_value = IDENTIFIER_CLASS_VALUE (id);;
4775 saved->real_type_value = REAL_IDENTIFIER_TYPE_VALUE (id);
4776 IDENTIFIER_BINDING (id) = NULL;
4777 IDENTIFIER_CLASS_VALUE (id) = NULL_TREE;
4778 old_bindings = saved;
4779 skip_it:
4780 ;
4781 }
4782 POP_TIMEVAR_AND_RETURN (TV_NAME_LOOKUP, old_bindings);
4783 }
虽然,同一个名字的声明可以有多个,但是这个名字的标识符在编译过程中是唯一的。它所对应的对象由各自的作用域确定。4773行的id为标识符节点,通过IDENTIFIER_BINDING指向所有同名对象的cxx_binding所构成的链表中的某一节点。这一节点对应当前作用域中有效的对象,其previous域指向上级作用域中的对象。另外,如果当前作用域还是一个类绑定域,标识符还通过IDENTIFIER_CLASS_VALUE指向IDENTIFIER_BINDING所指向的cxx_binding节点。
在我们现在这个情形下,进入了全局名字空间,不需要从中返回(scope_chain因此具有一个空的节点,而不再为null)。但对于其它情形,需要从全局名字空间返回。这通过下面的函数来实现。
4848 void
4849 pop_from_top_level (void) in name-lookup.c
4850 {
4851 struct saved_scope *s = scope_chain;
4852 cxx_saved_binding *saved;
4853
4854 timevar_push (TV_NAME_LOOKUP);
4855 /* Clear out class-level bindings cache. */
4856 if (previous_class_type)
4857 invalidate_class_lookup_cache ();
4858
4859 current_lang_base = 0;
4860
4861 scope_chain = s->prev;
4862 for (saved = s->old_bindings; saved; saved = saved->previous)
4863 {
4864 tree id = saved->identifier;
4865
4866 IDENTIFIER_BINDING (id) = saved->binding;
4867 IDENTIFIER_CLASS_VALUE (id) = saved->class_value;
4868 SET_IDENTIFIER_TYPE_VALUE (id, saved->real_type_value);
4869 }
4870
4871 /* If we were in the middle of compiling a function, restore our
4872 state. */
4873 if (s->need_pop_function_context)
4874 pop_function_context_from (NULL_TREE);
4875 current_function_decl = s->function_decl;
4876 timevar_pop (TV_NAME_LOOKUP);
4877 }
在编译器根据需要临时退回全局名字空间,而后返回的情况下,previous_class_type可能已发生变化,也不再需要,在此清空这个缓存。不过其原始内容仍然会被上面4862行的FOR循环导入。
5548 void
5549 invalidate_class_lookup_cache (void) in class.c
5550 {
5551 tree t;
5552
5553 /* The IDENTIFIER_CLASS_VALUEs are no longer valid. */
5554 for (t = previous_class_values; t; t = TREE_CHAIN (t))
5555 IDENTIFIER_CLASS_VALUE (TREE_PURPOSE (t)) = NULL_TREE;
5556
5557 previous_class_values = NULL_TREE;
5558 previous_class_type = NULL_TREE;
5559 }