错误恢复是一个重要而又相当困难的议题。当遇到错误时,如何使解析器可以在错误点后的有效符号处重新解析,从来都不是一个简单的事。在C++的前端中,尝试性解析器提供了一个优雅的方法,当我们在尝试中发现了错误,则回滚进行下一个尝试。但是当所有的可能都尝试过而不能成功,我们不得不丢弃符号直到解析器可以重新开始。在C++的前端中,有几个函数用于此目的。第一个函数cp_parser_skip_to_closing_parenthesis将跳过符号直到找到非嵌套的右括号(“)”),或非嵌套的逗号(如果or_comma是true),或者分号,或者到文件末尾。
2024 static int
2025 cp_parser_skip_to_closing_parenthesis (cp_parser *parser, in parser.c
2026 bool recovering,
2027 bool or_comma,
2028 bool consume_paren)
2029 {
2030 unsigned paren_depth = 0;
2031 unsigned brace_depth = 0;
2032
2033 if (recovering && !or_comma && cp_parser_parsing_tentatively (parser)
2034 && !cp_parser_committed_to_tentative_parse (parser))
2035 return 0;
2036
2037 while (true)
2038 {
2039 cp_token *token;
2040
2041 /* If we've run out of tokens, then there is no closing `)'. */
2042 if (cp_lexer_next_token_is (parser->lexer, CPP_EOF))
2043 return 0;
2044
2045 token = cp_lexer_peek_token (parser->lexer);
2046
2047 /* This matches the processing in skip_to_end_of_statement. */
2048 if (token->type == CPP_SEMICOLON && !brace_depth)
2049 return 0;
2050 if (token->type == CPP_OPEN_BRACE)
2051 ++brace_depth;
2052 if (token->type == CPP_CLOSE_BRACE)
2053 {
2054 if (!brace_depth--)
2055 return 0;
2056 }
2057 if (recovering && or_comma && token->type == CPP_COMMA
2058 && !brace_depth && !paren_depth)
2059 return -1;
2060
2061 if (!brace_depth)
2062 {
2063 /* If it is an `(', we have entered another level of nesting. */
2064 if (token->type == CPP_OPEN_PAREN)
2065 ++paren_depth;
2066 /* If it is a `)', then we might be done. */
2067 else if (token->type == CPP_CLOSE_PAREN && !paren_depth--)
2068 {
2069 if (consume_paren)
2070 cp_lexer_consume_token (parser->lexer);
2071 return 1;
2072 }
2073 }
2074
2075 /* Consume the token. */
2076 cp_lexer_consume_token (parser->lexer);
2077 }
2078 }
从上面的代码中看到,所谓非嵌套,举例言之,例如{ .. ( .. ( .., ); .. ), ( ..;);}之中黑体且有下划线的符号即是非嵌套者。参数recovering如果是true,表示我们正在进行错误恢复;如果or_comma是false,表示我们不考虑非嵌套逗号,也即我们要跑到非嵌套的分号,右括号,或右大括号处,其中分号及右大括号都意味着语句的结束。这样,如果我们之前在尝试性解析之中,在开始解析前,前端会向延迟访问检查栈压入保存该次解析中可能出现的延迟访问检查的节点,在错误恢复之前(不考虑非嵌套逗号),必须确保尝试性解析已经安全停止(调用cp_parser_parse_definitely,但它只会移除当前解析的上下文,不确保清空上下文列表),否则我们没有机会恢复延迟访问检查栈。条件cp_parser_parsing_tentatively (parser) && cp_parser_committed_to_tentative_parse (parser)对于已停止的尝试性解析或非尝试性解析成立。对于2033行条件不能成立的情况,我们宁愿不处理,把它留给前端。
可以想见cp_parser_skip_to_closing_parenthesis更多时候是跑到右括号处。为了确保跳过一个语句,可以使用cp_parser_skip_to_end_of_statement。这里不考虑尝试性解析是否已经停止。这是因为,调用这个函数时,解析器已经没有办法再继续解析当前的语句,必须无条件执行,而且通常这时前端已经停止了尝试性解析。
2084 static void
2085 cp_parser_skip_to_end_of_statement (cp_parser* parser) in parser.c
2086 {
2087 unsigned nesting_depth = 0;
2088
2089 while (true)
2090 {
2091 cp_token *token;
2092
2093 /* Peek at the next token. */
2094 token = cp_lexer_peek_token (parser->lexer);
2095 /* If we've run out of tokens, stop. */
2096 if (token->type == CPP_EOF)
2097 break;
2098 /* If the next token is a `;', we have reached the end of the
2099 statement. */
2100 if (token->type == CPP_SEMICOLON && !nesting_depth)
2101 break;
2102 /* If the next token is a non-nested `}', then we have reached
2103 the end of the current block. */
2104 if (token->type == CPP_CLOSE_BRACE)
2105 {
2106 /* If this is a non-nested `}', stop before consuming it.
2107 That way, when confronted with something like:
2108
2109 { 3 + }
2110
2111 we stop before consuming the closing `}', even though we
2112 have not yet reached a `;'. */
2113 if (nesting_depth == 0)
2114 break;
2115 /* If it is the closing `}' for a block that we have
2116 scanned, stop -- but only after consuming the token.
2117 That way given:
2118
2119 void f g () { ... }
2120 typedef int I;
2121
2122 we will stop after the body of the erroneously declared
2123 function, but before consuming the following `typedef'
2124 declaration. */
2125 if (--nesting_depth == 0)
2126 {
2127 cp_lexer_consume_token (parser->lexer);
2128 break;
2129 }
2130 }
2131 /* If it the next token is a `{', then we are entering a new
2132 block. Consume the entire block. */
2133 else if (token->type == CPP_OPEN_BRACE)
2134 ++nesting_depth;
2135 /* Consume the token. */
2136 cp_lexer_consume_token (parser->lexer);
2137 }
2138 }
接下来,cp_parser_skip_to_end_of_block_or_statement也是类似的,不过它瞄准以分号结尾的语句块,如DO..WHILE循环。因此它仅考虑非嵌套分号,以及非嵌套的右大括号。
2162 static void
2163 cp_parser_skip_to_end_of_block_or_statement (cp_parser* parser) in parser.c
2164 {
2165 unsigned nesting_depth = 0;
2166
2167 while (true)
2168 {
2169 cp_token *token;
2170
2171 /* Peek at the next token. */
2172 token = cp_lexer_peek_token (parser->lexer);
2173 /* If we've run out of tokens, stop. */
2174 if (token->type == CPP_EOF)
2175 break;
2176 /* If the next token is a `;', we have reached the end of the
2177 statement. */
2178 if (token->type == CPP_SEMICOLON && !nesting_depth)
2179 {
2180 /* Consume the `;'. */
2181 cp_lexer_consume_token (parser->lexer);
2182 break;
2183 }
2184 /* Consume the token. */
2185 token = cp_lexer_consume_token (parser->lexer);
2186 /* If the next token is a non-nested `}', then we have reached
2187 the end of the current block. */
2188 if (token->type == CPP_CLOSE_BRACE
2189 && (nesting_depth == 0 || --nesting_depth == 0))
2190 break;
2191 /* If it the next token is a `{', then we are entering a new
2192 block. Consume the entire block. */
2193 if (token->type == CPP_OPEN_BRACE)
2194 ++nesting_depth;
2195 }
2196 }
毫不奇怪,还有一个函数专用于像FOR循环这样的不以分号结尾的语句块。
2201 static void
2202 cp_parser_skip_to_closing_brace (cp_parser *parser) in parser.c
2203 {
2204 unsigned nesting_depth = 0;
2205
2206 while (true)
2207 {
2208 cp_token *token;
2209
2210 /* Peek at the next token. */
2211 token = cp_lexer_peek_token (parser->lexer);
2212 /* If we've run out of tokens, stop. */
2213 if (token->type == CPP_EOF)
2214 break;
2215 /* If the next token is a non-nested `}', then we have reached
2216 the end of the current block. */
2217 if (token->type == CPP_CLOSE_BRACE && nesting_depth-- == 0)
2218 break;
2219 /* If it the next token is a `{', then we are entering a new
2220 block. Consume the entire block. */
2221 else if (token->type == CPP_OPEN_BRACE)
2222 ++nesting_depth;
2223 /* Consume the token. */
2224 cp_lexer_consume_token (parser->lexer);
2225 }
2226 }
有些时候,这些方法丢弃了太多的符号,更有甚者,它们可能是不合适的。比如在处理模板声明时,在其尖括号对(< >)中发生了错误,前进到右括号是不正确的,但前进至语句结尾则丢掉了太多的符号。最好是只丢掉直到“>”之前的符号。为了顾及这些情况, cp_parser_skip_until_found 将在指定符号处停住。
15109 static void
15110 cp_parser_skip_until_found (cp_parser* parser, in parser.c
15111 enum cpp_ttype type,
15112 const char* token_desc)
15113 {
15114 cp_token *token;
15115 unsigned nesting_depth = 0;
15116
15117 if (cp_parser_require (parser, type, token_desc))
15118 return;
15119
15120 /* Skip tokens until the desired token is found. */
15121 while (true)
15122 {
15123 /* Peek at the next token. */
15124 token = cp_lexer_peek_token (parser->lexer);
15125 /* If we've reached the token we want, consume it and
15126 stop. */
15127 if (token->type == type && !nesting_depth)
15128 {
15129 cp_lexer_consume_token (parser->lexer);
15130 return;
15131 }
15132 /* If we've run out of tokens, stop. */
15133 if (token->type == CPP_EOF)
15134 return;
15135 if (token->type == CPP_OPEN_BRACE
15136 || token->type == CPP_OPEN_PAREN
15137 || token->type == CPP_OPEN_SQUARE)
15138 ++nesting_depth;
15139 else if (token->type == CPP_CLOSE_BRACE
15140 || token->type == CPP_CLOSE_PAREN
15141 || token->type == CPP_CLOSE_SQUARE)
15142 {
15143 if (nesting_depth-- == 0)
15144 return;
15145 }
15146 /* Consume this token. */
15147 cp_lexer_consume_token (parser->lexer);
15148 }
15149 }
而且,当遇到可能标记着语句结尾的符号,保险起见,该函数也会退出。
解析器有数以十计的辅助函数,它们帮助解析器提取及验证符号。例程cp_lexer_next_token_is可以告知符号是否为期望的类型。
667 static bool
668 cp_lexer_next_token_is (cp_lexer* lexer, enum cpp_ttype type) in parser.c
669 {
670 cp_token *token;
671
672 /* Peek at the next token. */
673 token = cp_lexer_peek_token (lexer);
674 /* Check to see if it has the indicated TYPE. */
675 return token->type == type;
676 }
例程cp_parser_require如果发现符号是期望的类型,就“消化”它;否则发出错误消息。
15085 static cp_token *
15086 cp_parser_require (cp_parser* parser, in parser.c
15087 enum cpp_ttype type,
15088 const char* token_desc)
15089 {
15090 if (cp_lexer_next_token_is (parser->lexer, type))
15091 return cp_lexer_consume_token (parser->lexer);
15092 else
15093 {
15094 /* Output the MESSAGE -- unless we're parsing tentatively. */
15095 if (!cp_parser_simulate_error (parser))
15096 {
15097 char *message = concat ("expected ", token_desc, NULL);
15098 cp_parser_error (parser, message);
15099 free (message);
15100 }
15101 return NULL;
15102 }
15103 }
例程cp_parser_require_keyword类似于cp_parser_require,不过,期望的符号是语言的关键字。
15157 static cp_token *
15158 cp_parser_require_keyword (cp_parser* parser, in parser.c
15159 enum rid keyword,
15160 const char* token_desc)
15161 {
15162 cp_token *token = cp_parser_require (parser, CPP_KEYWORD, token_desc);
15163
15164 if (token && token->keyword != keyword)
15165 {
15166 dyn_string_t error_msg;
15167
15168 /* Format the error message. */
15169 error_msg = dyn_string_new (0);
15170 dyn_string_append_cstr (error_msg, "expected ");
15171 dyn_string_append_cstr (error_msg, token_desc);
15172 cp_parser_error (parser, error_msg->s);
15173 dyn_string_delete (error_msg);
15174 return NULL;
15175 }
15176
15177 return token;
15178 }
另一个类似的函数是cp_lexer_next_token_is_keyword。
688 static bool
689 cp_lexer_next_token_is_keyword (cp_lexer* lexer, enum rid keyword) in parser.c
690 {
691 cp_token *token;
692
693 /* Peek at the next token. */
694 token = cp_lexer_peek_token (lexer);
695 /* Check to see if it is the indicated keyword. */
696 return token->keyword == keyword;
697 }
例程cp_parser_identifier则期望标识符(identifier)。
2299 static tree
2300 cp_parser_identifier (cp_parser* parser) in parser.c
2301 {
2302 cp_token *token;
2303
2304 /* Look for the identifier. */
2305 token = cp_parser_require (parser, CPP_NAME, "identifier");
2306 /* Return the value. */
2307 return token ? token->value : error_mark_node;
2308 }