Vczh Library++3.0之正则表达式引擎(使用Visitor模式访问语法树)
虽然说分析语法树依赖于递归,但是你真的去写递归那是一件很烦的事情,个人觉得烦事烦在你每次都要去RegexExpression.h里面声明所有的虚函数之后复制过来一一运行。有没有办法仅编辑.cpp文件就能做到呢?也就是说,在如何不修改Expression一系列类的接口的情况下给Expression添加算法?一般来说对付树形结构都是使用Visitor模式的。
首先对于 上一篇文章定义的树来看,我们需要设计一个通用的Visitor接口:
1
class
IRegexExpressionAlgorithm :
public
Interface
2 {
3 public :
4 virtual void Visit(CharSetExpression * expression) = 0 ;
5 virtual void Visit(LoopExpression * expression) = 0 ;
6 virtual void Visit(SequenceExpression * expression) = 0 ;
7 virtual void Visit(AlternateExpression * expression) = 0 ;
8 virtual void Visit(BeginExpression * expression) = 0 ;
9 virtual void Visit(EndExpression * expression) = 0 ;
10 virtual void Visit(CaptureExpression * expression) = 0 ;
11 virtual void Visit(MatchExpression * expression) = 0 ;
12 virtual void Visit(PositiveExpression * expression) = 0 ;
13 virtual void Visit(NegativeExpression * expression) = 0 ;
14 virtual void Visit(UsingExpression * expression) = 0 ;
15 };
2 {
3 public :
4 virtual void Visit(CharSetExpression * expression) = 0 ;
5 virtual void Visit(LoopExpression * expression) = 0 ;
6 virtual void Visit(SequenceExpression * expression) = 0 ;
7 virtual void Visit(AlternateExpression * expression) = 0 ;
8 virtual void Visit(BeginExpression * expression) = 0 ;
9 virtual void Visit(EndExpression * expression) = 0 ;
10 virtual void Visit(CaptureExpression * expression) = 0 ;
11 virtual void Visit(MatchExpression * expression) = 0 ;
12 virtual void Visit(PositiveExpression * expression) = 0 ;
13 virtual void Visit(NegativeExpression * expression) = 0 ;
14 virtual void Visit(UsingExpression * expression) = 0 ;
15 };
接口定义好了之后,就给所有的表达式树添加一个Apply(虚)函数来访问相应的Visit函数:
1
void
CharSetExpression::Apply(IRegexExpressionAlgorithm
&
algorithm)
2 {
3 algorithm.Visit( this );
4 }
5
6 void LoopExpression::Apply(IRegexExpressionAlgorithm & algorithm)
7 {
8 algorithm.Visit( this );
9 }
10
11 void SequenceExpression::Apply(IRegexExpressionAlgorithm & algorithm)
12 {
13 algorithm.Visit( this );
14 }
15
16 void AlternateExpression::Apply(IRegexExpressionAlgorithm & algorithm)
17 {
18 algorithm.Visit( this );
19 }
20
21 void BeginExpression::Apply(IRegexExpressionAlgorithm & algorithm)
22 {
23 algorithm.Visit( this );
24 }
25
26 void EndExpression::Apply(IRegexExpressionAlgorithm & algorithm)
27 {
28 algorithm.Visit( this );
29 }
30
31 void CaptureExpression::Apply(IRegexExpressionAlgorithm & algorithm)
32 {
33 algorithm.Visit( this );
34 }
35
36 void MatchExpression::Apply(IRegexExpressionAlgorithm & algorithm)
37 {
38 algorithm.Visit( this );
39 }
40
41 void PositiveExpression::Apply(IRegexExpressionAlgorithm & algorithm)
42 {
43 algorithm.Visit( this );
44 }
45
46 void NegativeExpression::Apply(IRegexExpressionAlgorithm & algorithm)
47 {
48 algorithm.Visit( this );
49 }
50
51 void UsingExpression::Apply(IRegexExpressionAlgorithm & algorithm)
52 {
53 algorithm.Visit( this );
54 }
2 {
3 algorithm.Visit( this );
4 }
5
6 void LoopExpression::Apply(IRegexExpressionAlgorithm & algorithm)
7 {
8 algorithm.Visit( this );
9 }
10
11 void SequenceExpression::Apply(IRegexExpressionAlgorithm & algorithm)
12 {
13 algorithm.Visit( this );
14 }
15
16 void AlternateExpression::Apply(IRegexExpressionAlgorithm & algorithm)
17 {
18 algorithm.Visit( this );
19 }
20
21 void BeginExpression::Apply(IRegexExpressionAlgorithm & algorithm)
22 {
23 algorithm.Visit( this );
24 }
25
26 void EndExpression::Apply(IRegexExpressionAlgorithm & algorithm)
27 {
28 algorithm.Visit( this );
29 }
30
31 void CaptureExpression::Apply(IRegexExpressionAlgorithm & algorithm)
32 {
33 algorithm.Visit( this );
34 }
35
36 void MatchExpression::Apply(IRegexExpressionAlgorithm & algorithm)
37 {
38 algorithm.Visit( this );
39 }
40
41 void PositiveExpression::Apply(IRegexExpressionAlgorithm & algorithm)
42 {
43 algorithm.Visit( this );
44 }
45
46 void NegativeExpression::Apply(IRegexExpressionAlgorithm & algorithm)
47 {
48 algorithm.Visit( this );
49 }
50
51 void UsingExpression::Apply(IRegexExpressionAlgorithm & algorithm)
52 {
53 algorithm.Visit( this );
54 }
于是我们可以去实现一个IRegexExpressionAlgorithm了。但是事情还没完。如果每一个算法都要去实现一个IRegexExpressionAlgorithm的话,我们会发现因为算法所需要的参数不同,为了使用Visit这种无参数的函数,我们都需要为每一个具体的Apply实现一次参数的缓存工作。但是因为参数是未知的,而且模板函数又不能是虚函数(所以不能把Expression::Apply写成模板函数),所以IRegexExpressionAlgorithm的Visit系列函数是没有参数的。因此一个辅助类就应运而生了。
这个辅助类用来给你很直接地写具有一个参数的算法。你只要创建它,然后写完所有的(算法类里面的)Apply函数就行了。那么你怎么在一个表达式上面调用你自己的算法呢?假设参数是p,我们希望只需要简单地执行Invoke(expression, p)就可以调用到自己了。所以这里实现了一个RegexExpressionAlgorithm<ReturnType, ParameterType>。当然对于ReturnType==void的时候,我们还需要再特化一个,不过这个就不说了:
1
template
<
typename ReturnType, typename ParameterType
=
void
*>
2 class RegexExpressionAlgorithm : public Object, public IRegexExpressionAlgorithm
3 {
4 private :
5 ReturnType returnValue;
6 void * parameterValue;
7 public :
8
9 ReturnType Invoke(Expression * expression, ParameterType parameter)
10 {
11 parameterValue = ( void * ) & parameter;
12 expression -> Apply( * this );
13 return returnValue;
14 }
15
16 ReturnType Invoke(Expression::Ref expression, ParameterType parameter)
17 {
18 parameterValue = ( void * ) & parameter;
19 expression -> Apply( * this );
20 return returnValue;
21 }
22
23 virtual ReturnType Apply(CharSetExpression * expression, ParameterType parameter) = 0 ;
24 virtual ReturnType Apply(LoopExpression * expression, ParameterType parameter) = 0 ;
25 virtual ReturnType Apply(SequenceExpression * expression, ParameterType parameter) = 0 ;
26 virtual ReturnType Apply(AlternateExpression * expression, ParameterType parameter) = 0 ;
27 virtual ReturnType Apply(BeginExpression * expression, ParameterType parameter) = 0 ;
28 virtual ReturnType Apply(EndExpression * expression, ParameterType parameter) = 0 ;
29 virtual ReturnType Apply(CaptureExpression * expression, ParameterType parameter) = 0 ;
30 virtual ReturnType Apply(MatchExpression * expression, ParameterType parameter) = 0 ;
31 virtual ReturnType Apply(PositiveExpression * expression, ParameterType parameter) = 0 ;
32 virtual ReturnType Apply(NegativeExpression * expression, ParameterType parameter) = 0 ;
33 virtual ReturnType Apply(UsingExpression * expression, ParameterType parameter) = 0 ;
34 public :
35 void Visit(CharSetExpression * expression)
36 {
37 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
38 }
39
40 void Visit(LoopExpression * expression)
41 {
42 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
43 }
44
45 void Visit(SequenceExpression * expression)
46 {
47 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
48 }
49
50 void Visit(AlternateExpression * expression)
51 {
52 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
53 }
54
55 void Visit(BeginExpression * expression)
56 {
57 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
58 }
59
60 void Visit(EndExpression * expression)
61 {
62 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
63 }
64
65 void Visit(CaptureExpression * expression)
66 {
67 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
68 }
69
70 void Visit(MatchExpression * expression)
71 {
72 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
73 }
74
75 void Visit(PositiveExpression * expression)
76 {
77 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
78 }
79
80 void Visit(NegativeExpression * expression)
81 {
82 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
83 }
84
85 void Visit(UsingExpression * expression)
86 {
87 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
88 }
89 };
2 class RegexExpressionAlgorithm : public Object, public IRegexExpressionAlgorithm
3 {
4 private :
5 ReturnType returnValue;
6 void * parameterValue;
7 public :
8
9 ReturnType Invoke(Expression * expression, ParameterType parameter)
10 {
11 parameterValue = ( void * ) & parameter;
12 expression -> Apply( * this );
13 return returnValue;
14 }
15
16 ReturnType Invoke(Expression::Ref expression, ParameterType parameter)
17 {
18 parameterValue = ( void * ) & parameter;
19 expression -> Apply( * this );
20 return returnValue;
21 }
22
23 virtual ReturnType Apply(CharSetExpression * expression, ParameterType parameter) = 0 ;
24 virtual ReturnType Apply(LoopExpression * expression, ParameterType parameter) = 0 ;
25 virtual ReturnType Apply(SequenceExpression * expression, ParameterType parameter) = 0 ;
26 virtual ReturnType Apply(AlternateExpression * expression, ParameterType parameter) = 0 ;
27 virtual ReturnType Apply(BeginExpression * expression, ParameterType parameter) = 0 ;
28 virtual ReturnType Apply(EndExpression * expression, ParameterType parameter) = 0 ;
29 virtual ReturnType Apply(CaptureExpression * expression, ParameterType parameter) = 0 ;
30 virtual ReturnType Apply(MatchExpression * expression, ParameterType parameter) = 0 ;
31 virtual ReturnType Apply(PositiveExpression * expression, ParameterType parameter) = 0 ;
32 virtual ReturnType Apply(NegativeExpression * expression, ParameterType parameter) = 0 ;
33 virtual ReturnType Apply(UsingExpression * expression, ParameterType parameter) = 0 ;
34 public :
35 void Visit(CharSetExpression * expression)
36 {
37 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
38 }
39
40 void Visit(LoopExpression * expression)
41 {
42 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
43 }
44
45 void Visit(SequenceExpression * expression)
46 {
47 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
48 }
49
50 void Visit(AlternateExpression * expression)
51 {
52 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
53 }
54
55 void Visit(BeginExpression * expression)
56 {
57 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
58 }
59
60 void Visit(EndExpression * expression)
61 {
62 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
63 }
64
65 void Visit(CaptureExpression * expression)
66 {
67 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
68 }
69
70 void Visit(MatchExpression * expression)
71 {
72 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
73 }
74
75 void Visit(PositiveExpression * expression)
76 {
77 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
78 }
79
80 void Visit(NegativeExpression * expression)
81 {
82 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
83 }
84
85 void Visit(UsingExpression * expression)
86 {
87 returnValue = Apply(expression, * ((ParameterType * )parameterValue));
88 }
89 };
好了,让我们使用它来实现之前提到过的IsEqual功能吧。首先实现一个IsEqualAlgorithm:
1
class
IsEqualAlgorithm :
public
RegexExpressionAlgorithm
<
bool
, Expression
*>
2 {
3 public :
4 bool Apply(CharSetExpression * expression, Expression * target)
5 {
6 CharSetExpression * expected = dynamic_cast < CharSetExpression *> (target);
7 if (expected)
8 {
9 if (expression -> reverse != expected -> reverse) return false ;
10 if (expression -> ranges.Count() != expected -> ranges.Count()) return false ;
11 for ( int i = 0 ;i < expression -> ranges.Count();i ++ )
12 {
13 if (expression -> ranges[i] != expected -> ranges[i]) return false ;
14 }
15 return true ;
16 }
17 return false ;
18 }
19
20 bool Apply(LoopExpression * expression, Expression * target)
21 {
22 LoopExpression * expected = dynamic_cast < LoopExpression *> (target);
23 if (expected)
24 {
25 if (expression -> min != expected -> min) return false ;
26 if (expression -> max != expected -> max) return false ;
27 if ( ! Invoke(expression -> expression, expected -> expression.Obj())) return false ;
28 return true ;
29 }
30 return false ;
31 }
32
33 bool Apply(SequenceExpression * expression, Expression * target)
34 {
35 SequenceExpression * expected = dynamic_cast < SequenceExpression *> (target);
36 if (expected)
37 {
38 if ( ! Invoke(expression -> left, expected -> left.Obj())) return false ;
39 if ( ! Invoke(expression -> right, expected -> right.Obj())) return false ;
40 return true ;
41 }
42 return false ;
43 }
44
45 bool Apply(AlternateExpression * expression, Expression * target)
46 {
47 AlternateExpression * expected = dynamic_cast < AlternateExpression *> (target);
48 if (expected)
49 {
50 if ( ! Invoke(expression -> left, expected -> left.Obj())) return false ;
51 if ( ! Invoke(expression -> right, expected -> right.Obj())) return false ;
52 return true ;
53 }
54 return false ;
55 }
56
57 bool Apply(BeginExpression * expression, Expression * target)
58 {
59 BeginExpression * expected = dynamic_cast < BeginExpression *> (target);
60 if (expected)
61 {
62 return true ;
63 }
64 return false ;
65 }
66
67 bool Apply(EndExpression * expression, Expression * target)
68 {
69 EndExpression * expected = dynamic_cast < EndExpression *> (target);
70 if (expected)
71 {
72 return true ;
73 }
74 return false ;
75 }
76
77 bool Apply(CaptureExpression * expression, Expression * target)
78 {
79 CaptureExpression * expected = dynamic_cast < CaptureExpression *> (target);
80 if (expected)
81 {
82 if (expression -> name != expected -> name) return false ;
83 if ( ! Invoke(expression -> expression, expected -> expression.Obj())) return false ;
84 return true ;
85 }
86 return false ;
87 }
88
89 bool Apply(MatchExpression * expression, Expression * target)
90 {
91 MatchExpression * expected = dynamic_cast < MatchExpression *> (target);
92 if (expected)
93 {
94 if (expression -> name != expected -> name) return false ;
95 if (expression -> index != expected -> index) return false ;
96 return true ;
97 }
98 return false ;
99 }
100
101 bool Apply(PositiveExpression * expression, Expression * target)
102 {
103 PositiveExpression * expected = dynamic_cast < PositiveExpression *> (target);
104 if (expected)
105 {
106 if ( ! Invoke(expression -> expression, expected -> expression.Obj())) return false ;
107 return true ;
108 }
109 return false ;
110 }
111
112 bool Apply(NegativeExpression * expression, Expression * target)
113 {
114 NegativeExpression * expected = dynamic_cast < NegativeExpression *> (target);
115 if (expected)
116 {
117 if ( ! Invoke(expression -> expression, expected -> expression.Obj())) return false ;
118 return true ;
119 }
120 return false ;
121 }
122
123 bool Apply(UsingExpression * expression, Expression * target)
124 {
125 UsingExpression * expected = dynamic_cast < UsingExpression *> (target);
126 if (expected)
127 {
128 if (expression -> name != expected -> name) return false ;
129 return true ;
130 }
131 return false ;
132 }
133 };
2 {
3 public :
4 bool Apply(CharSetExpression * expression, Expression * target)
5 {
6 CharSetExpression * expected = dynamic_cast < CharSetExpression *> (target);
7 if (expected)
8 {
9 if (expression -> reverse != expected -> reverse) return false ;
10 if (expression -> ranges.Count() != expected -> ranges.Count()) return false ;
11 for ( int i = 0 ;i < expression -> ranges.Count();i ++ )
12 {
13 if (expression -> ranges[i] != expected -> ranges[i]) return false ;
14 }
15 return true ;
16 }
17 return false ;
18 }
19
20 bool Apply(LoopExpression * expression, Expression * target)
21 {
22 LoopExpression * expected = dynamic_cast < LoopExpression *> (target);
23 if (expected)
24 {
25 if (expression -> min != expected -> min) return false ;
26 if (expression -> max != expected -> max) return false ;
27 if ( ! Invoke(expression -> expression, expected -> expression.Obj())) return false ;
28 return true ;
29 }
30 return false ;
31 }
32
33 bool Apply(SequenceExpression * expression, Expression * target)
34 {
35 SequenceExpression * expected = dynamic_cast < SequenceExpression *> (target);
36 if (expected)
37 {
38 if ( ! Invoke(expression -> left, expected -> left.Obj())) return false ;
39 if ( ! Invoke(expression -> right, expected -> right.Obj())) return false ;
40 return true ;
41 }
42 return false ;
43 }
44
45 bool Apply(AlternateExpression * expression, Expression * target)
46 {
47 AlternateExpression * expected = dynamic_cast < AlternateExpression *> (target);
48 if (expected)
49 {
50 if ( ! Invoke(expression -> left, expected -> left.Obj())) return false ;
51 if ( ! Invoke(expression -> right, expected -> right.Obj())) return false ;
52 return true ;
53 }
54 return false ;
55 }
56
57 bool Apply(BeginExpression * expression, Expression * target)
58 {
59 BeginExpression * expected = dynamic_cast < BeginExpression *> (target);
60 if (expected)
61 {
62 return true ;
63 }
64 return false ;
65 }
66
67 bool Apply(EndExpression * expression, Expression * target)
68 {
69 EndExpression * expected = dynamic_cast < EndExpression *> (target);
70 if (expected)
71 {
72 return true ;
73 }
74 return false ;
75 }
76
77 bool Apply(CaptureExpression * expression, Expression * target)
78 {
79 CaptureExpression * expected = dynamic_cast < CaptureExpression *> (target);
80 if (expected)
81 {
82 if (expression -> name != expected -> name) return false ;
83 if ( ! Invoke(expression -> expression, expected -> expression.Obj())) return false ;
84 return true ;
85 }
86 return false ;
87 }
88
89 bool Apply(MatchExpression * expression, Expression * target)
90 {
91 MatchExpression * expected = dynamic_cast < MatchExpression *> (target);
92 if (expected)
93 {
94 if (expression -> name != expected -> name) return false ;
95 if (expression -> index != expected -> index) return false ;
96 return true ;
97 }
98 return false ;
99 }
100
101 bool Apply(PositiveExpression * expression, Expression * target)
102 {
103 PositiveExpression * expected = dynamic_cast < PositiveExpression *> (target);
104 if (expected)
105 {
106 if ( ! Invoke(expression -> expression, expected -> expression.Obj())) return false ;
107 return true ;
108 }
109 return false ;
110 }
111
112 bool Apply(NegativeExpression * expression, Expression * target)
113 {
114 NegativeExpression * expected = dynamic_cast < NegativeExpression *> (target);
115 if (expected)
116 {
117 if ( ! Invoke(expression -> expression, expected -> expression.Obj())) return false ;
118 return true ;
119 }
120 return false ;
121 }
122
123 bool Apply(UsingExpression * expression, Expression * target)
124 {
125 UsingExpression * expected = dynamic_cast < UsingExpression *> (target);
126 if (expected)
127 {
128 if (expression -> name != expected -> name) return false ;
129 return true ;
130 }
131 return false ;
132 }
133 };
譬如看AlternateExpression的IsEqual方法。AlternateExpression跟Expression是否相等只需要通过比较两个子表达式是否相等即可,于是代码就变成了:
1
bool
Apply(AlternateExpression
*
expression, Expression
*
target)
2 {
3 AlternateExpression * expected = dynamic_cast < AlternateExpression *> (target);
4 if (expected)
5 {
6 if ( ! Invoke(expression -> left, expected -> left.Obj())) return false ;
7 if ( ! Invoke(expression -> right, expected -> right.Obj())) return false ;
8 return true ;
9 }
10 return false ;
11 }
2 {
3 AlternateExpression * expected = dynamic_cast < AlternateExpression *> (target);
4 if (expected)
5 {
6 if ( ! Invoke(expression -> left, expected -> left.Obj())) return false ;
7 if ( ! Invoke(expression -> right, expected -> right.Obj())) return false ;
8 return true ;
9 }
10 return false ;
11 }
如果将AlternateExpression* expression变为AlternateExpression* const this,将Invoke(a, b)换成a->IsEqual(b),就跟直接写IsEqual虚函数没两样了。于是自己调用自己还是很方便的。但是我们最终还是想做成a->IsEqual(b)的,于是还要在Expression基类中写一点:
1
bool
Expression::IsEqual(vl::regex_internal::Expression
*
expression)
2 {
3 IsEqualAlgorithm algorithm;
4 return algorithm.Invoke( this , expression);
5 }
2 {
3 IsEqualAlgorithm algorithm;
4 return algorithm.Invoke( this , expression);
5 }
于是IsEqual的实现就结束了。虽然Visitor直接使用会很麻烦,但是我们可以通过稍微的改造一下让其更好用。当然这里跟Visitor其实还不是完全一致,细节问题就不详细讨论了。至少文章的目标“不通过修改Expression的接口而添加新功能”的目标已经实现了。就结果来讲,添加一个新的功能还是很方便的。