1
#include
<
iostream
>
2
#include
"
stdio.h
"
3
using
namespace
std;
4
#include
<
String
>
5
#include
<
Stack
>
6
#include
<
set
>
7
#include
<
queue
>
8
#include
<
fstream
>
9
#include
<
iomanip
>
10
struct
path
11
{
12
int
from;
//
一个path代表从一个状态到另一个状态的路径
13
int
to;
14
string
strPath;
//
strPath代表这个结构体处理的路径
15
};
16
struct
dealStruct
17
{
18
int
stCount;
19
struct
path
*
resPtr;
20
};
21
struct
setNode
22
{
23
set
<
int
>
*
Dset;
24
int
num;
25
struct
setNode
*
next;
26
};
27
bool
judgeLD(
char
v )
//
判断正则表达式中是否含有非法字符
28
{
29
if
( (
int
(v)
>
47
&&
int
(v)
<=
57
)
||
30
(
int
(v)
>=
65
&&
int
(v)
<=
90
)
||
31
(
int
(v)
>=
97
&&
int
(v)
<=
122
)
||
32
v
==
'
>
'
||
v
==
'
<
'
||
v
==
'
=
'
||
33
v
==
'
`
'
||
v
==
'
+
'
||
v
==
'
-
'
)
34
return
true
;
35
else
36
return
false
;
37
}
38
struct
dealStruct
*
dealWithStr(
string
,
int
);
//
通过正则表达式生成NFA
39
int
jihe(
int
, stack
<
struct
path
*>
&
);
//
处理正则表达式时'*’的运算函数
40
int
lianjie(
int
, stack
<
struct
path
*>
&
);
//
处理正则表达式时'&’(自定义运算符ab相当于a&b)的运算函数
41
int
huo(
int
, stack
<
struct
path
*>
&
);;
//
处理正则表达式时'|’的运算函数
42
int
reduceNFA(
int
,
int
*
,
int
);
//
NFA->DFA函数
43
void
BFS(
int
,
int
);
//
计算一个状态的ξ闭包
44
void
produceC(
int
,
string
*
);
//
生成词法分析器的.cpp文件
45
int
arrForBFS[
100
]
=
{
0
};
//
存放BFS函数生成的闭包的数组
46
string
T[
2000
][
2000
];
//
NFA的数组表示
47
string
recT[
2000
][
2000
];
//
DFA的数组表示
48
int
stateCountDFA;
//
DFA状态数量
49
int
reduceState[
2
][
2000
];
//
化简DFA用的数组
50
set
<
char
>
tv;
//
所有终结符
51
set
<
char
>
::iterator Vt;
52
void
reduceDFA();
//
化简DFA
53
void
combine(
int
,
int
);
//
化简DFA时合并两个状态
54
int
regCount;
//
输入正则表达式的个数
55
56
int
main()
57
{
58
regCount
=
0
;
59
int
i,j,k;
60
int
temp
=
0
;
61
62
string
*
regArr;
63
string
*
typeName;
64
int
*
typeIndex;
65
int
**
resultEnterExt;
66
67
68
ifstream inRegular;
69
inRegular.open(
"
RegularExpressions.txt
"
);
70
inRegular
>>
regCount;
//
正则表达式个数
71
typeName
=
new
string
[regCount];
72
typeIndex
=
new
int
[regCount];
73
resultEnterExt
=
new
int
*
[regCount];
//
定义一个二位动态数组用于存放每个正则表达式所生成的NFA的起始状态和终结状态
74
for
( i
=
0
;i
<
regCount;i
++
)
75
resultEnterExt[i]
=
new
int
[
2
];
76
regArr
=
new
string
[regCount];
77
for
( i
=
0
;i
<
regCount;i
++
)
78
{
79
inRegular
>>
typeName[i];
80
inRegular
>>
regArr[i];
81
}
//
读入正则表达式
82
83
int
stateCount
=
-
1
;
84
for
( i
=
0
;i
<
regCount;i
++
)
//
遍历每个正则表达式
85
{
86
for
( j
=
0
;j
<
regArr[i].length()
-
1
;j
++
)
87
if
( ( judgeLD(regArr[i].at(j) )
&&
( judgeLD(regArr[i].at(j
+
1
))
||
regArr[i].at(j
+
1
)
==
'
(
'
) )
||
88
( regArr[i].at(j)
==
'
)
'
&&
( regArr[i].at(j
+
1
)
==
'
(
'
||
judgeLD(regArr[i].at(j
+
1
)) ) )
||
89
( regArr[i].at(j)
==
'
*
'
&&
( regArr[i].at(j
+
1
)
==
'
(
'
||
judgeLD(regArr[i].at(j
+
1
)) ) ) )
90
regArr[i].insert(j
+
1
,
"
&
"
);
//
预处理正则表达式,添加自定义运算符'&'
91
92
93
regArr[i].insert(regArr[i].length(),
"
#%
"
);
//
在末尾添加结束符
94
cout
<<
regArr[i]
<<
endl;
//
输出处理后的正则表达式
95
96
97
j
=
0
;
98
struct
path
*
p
=
NULL;
99
struct
dealStruct
*
result
=
dealWithStr( regArr[i],stateCount );
//
对于每个正则表达式调用NFA生成函数
100
101
stateCount
=
result
->
stCount;
102
resultEnterExt[i][
0
]
=
result
->
resPtr
->
from;
//
记录该正则表达式的起始状态和终结状态
103
resultEnterExt[i][
1
]
=
result
->
resPtr
->
to;
104
typeIndex[i]
=
resultEnterExt[i][
1
];
105
/*
cout<<"The number of states now is "<<stateCount<<endl;
106
cout<<"The regular perform dealt with just now is "<<result->resPtr->strPath<<endl;
107
cout<<"The from dealt with just now is "<<resultEnterExt[i][0]<<endl;
108
cout<<"The to dealt with just now is "<<resultEnterExt[i][1]<<endl;
*/
109
}
110
111
stateCount
++
;
//
汇总所有正则表达式的NFA
112
cout
<<
"
NFA合并完毕!
"
<<
endl;
113
114
for
( i
=
0
;i
<
regCount;i
++
)
115
T[stateCount][resultEnterExt[i][
0
]]
=
'
`
'
;
//
从公共起始状态出发
116
117
/*
for( i = 0;i <= stateCount;i++ ) //输出产生的NFA以检验
118
{
119
for( j = 0;j <= stateCount;j++ )
120
{
121
if( T[i][j] == "" ) cout<<" |";
122
else cout<<T[i][j]<<'|';
123
}
124
cout<<endl;
125
}
*/
126
127
stateCountDFA
=
reduceNFA( stateCount, typeIndex, regCount );
//
调用NFA->DFA函数生成DFA
128
reduceDFA();
129
130
produceC( stateCountDFA, typeName );
//
产生.cpp代码
131
132
system(
"
pause
"
);
133
134
return
0
;
135
}
136
137
/*
以下为NFA的生成函数,采用三个栈实现Thompson算法,第一个栈用于读入字符和运算符
138
第二个栈用于逆序,第三个栈专门用于处理一对括号内部的表达式,其实用两个栈就可以
139
实现,及把第三个栈舍弃,直接把括号的计算放在第一个栈中进行也可。为了计算更加
140
清楚不易出错故而多加了一个栈*
141
算法思路:读入正则表达式放入栈A中,*,&,|的优先级分别定为3,2,1,而表达式末
142
尾的'#'优先级最低,看做0。用一个三元数组存放当各个运算符的个数,字符均放入栈A中,
143
读到运算符时,当栈外的运算符的优先级大于等于站内所有运算符时,该运算符进栈,
144
如果低于所有站内运算符,计算站内所有运算符表的运算,直到栈内没有比栈外更高优先
145
级的运算符。
146
读到(的时候,(进栈,之后不论读到什么均进栈直到读到第一个),将这对()中的字符串
147
提出,用栈B逆序后放入栈C,在栈C中按上述方式处理直到只有一个元素,将元素返回栈A
148
栈中存储的是指向path结构体的指针
*/
149
struct
dealStruct
*
dealWithStr(
string
str,
int
stateCountV )
150
{
151
int
j
=
0
;
int
temp
=
0
;
152
struct
path
*
p
=
NULL;
153
int
currentPri[
3
]
=
{
0
};
//
用于储存栈中三种运算符(*,&,|)的个数,便于计算
154
stack
<
struct
path
*>
stackA;
155
while
( str.at(j)
!=
'
%
'
)
156
{
157
if
( judgeLD(str.at(j)) )
//
如果是字符,放入栈A中
158
{
159
tv.insert(str.at(j));
//
放入tv中
160
stateCountV
++
;
161
temp
=
stateCountV;
162
stateCountV
++
;
163
T[temp][stateCountV]
+=
str.at(j);
164
p
=
new
path();
165
p
->
from
=
temp;
166
p
->
to
=
stateCountV;
167
p
->
strPath
+=
str.at(j);
168
stackA.push(p);
169
//
cout<<"push a new letter "<<p->strPath<<endl;
170
j
++
;
171
}
172
else
if
( str.at(j)
==
'
*
'
)
//
如果是'*'由于没有比它优先级更高的了,
173
{
//
故必然是入栈
174
currentPri[
0
]
++
;
175
p
=
new
path();
176
p
->
from
=
-
1
;
177
p
->
to
=
-
1
;
178
p
->
strPath
+=
"
*
"
;
179
stackA.push(p);
180
//
cout<<"push *"<<endl;
181
j
++
;
182
}
183
else
if
( str.at(j)
==
'
&
'
)
//
如果是'&',检查之前有没有'*'
184
{
185
if
( currentPri[
0
]
==
0
)
//
没有的话,入栈
186
currentPri[
1
]
++
;
187
else
188
{
189
currentPri[
0
]
--
;
//
有,计算*
190
currentPri[
1
]
++
;
191
192
stateCountV
=
jihe( stateCountV, stackA );
193
}
194
p
=
new
path();
195
p
->
from
=
-
2
;
196
p
->
to
=
-
2
;
197
p
->
strPath
=
"
&
"
;
198
stackA.push(p);
//
不论有没有'*','&'必然会入栈
199
//
cout<<"push &"<<endl;
200
j
++
;
201
}
202
else
if
( str.at(j)
==
'
|
'
)
//
如果是'|',查看前面有没有*,&
203
{
204
while
( currentPri[
0
]
>
0
||
currentPri[
1
]
>
0
)
//
一直算直到没有
205
{
206
if
( currentPri[
0
]
>
0
)
207
{
208
currentPri[
0
]
--
;
209
stateCountV
=
jihe( stateCountV, stackA );
210
}
211
else
212
{
213
currentPri[
1
]
--
;
214
stateCountV
=
lianjie( stateCountV, stackA );
215
}
216
}
217
currentPri[
2
]
++
;
//
不论有没有*,&,最后'|'必然还是要入栈
218
p
=
new
path();
219
p
->
from
=
-
3
;
220
p
->
to
=
-
3
;
221
p
->
strPath
=
"
|
"
;
222
stackA.push(p);
223
//
cout<<"push |"<<endl;
224
j
++
;
225
}
226
else
if
( str.at(j)
==
'
#
'
)
//
如果是'#',则计算前面所有的运算
227
{
228
while
( currentPri[
0
]
>
0
||
currentPri[
1
]
>
0
||
currentPri[
2
]
>
0
)
229
{
230
if
( currentPri[
0
]
>
0
)
231
{
232
currentPri[
0
]
--
;
233
stateCountV
=
jihe( stateCountV, stackA );
234
}
235
else
if
( currentPri[
1
]
>
0
)
236
{
237
currentPri[
1
]
--
;
238
stateCountV
=
lianjie( stateCountV, stackA );
239
}
240
else
241
{
242
currentPri[
2
]
--
;
243
stateCountV
=
huo( stateCountV, stackA );
244
}
245
246
}
247
//
cout<<"The End, there is one struct path in stackA: "<<stackA.size()<<endl;
248
j
++
;
249
//
cout<<"It is "<<(stackA.top())->strPath<<endl;
250
}
251
else
if
( str.at(j)
==
'
(
'
)
//
发现左括号,进入括号处理过程
252
{
253
//
cout<<"发现一个最外层括号"<<endl;
254
int
parenthCount
=
1
;
255
bool
flag
=
true
;
256
while
( parenthCount
>
0
)
257
{
258
if
( str.at(j)
==
'
(
'
)
259
{
260
p
=
new
path();
261
p
->
from
=
-
5
;
262
p
->
to
=
-
5
;
263
p
->
strPath
+=
"
(
"
;
264
stackA.push(p);
265
//
cout<<"放入一左个括号"<<endl;
266
if
(
!
flag )parenthCount
++
;
267
flag
=
false
;
268
}
269
else
if
( str.at(j)
==
'
)
'
)
//
发现第一个右括号
270
{
271
//
cout<<"发现一个内层右括号"<<endl;
272
parenthCount
--
;
273
274
stack
<
struct
path
*>
stackB;
//
定义另两个栈
275
stack
<
struct
path
*>
stackC;
276
277
p
=
new
path();
278
p
->
from
=
-
6
;
279
p
->
to
=
-
6
;
280
p
->
strPath
+=
"
#
"
;
281
stackB.push(p);
282
while
( (stackA.top())
->
from
!=
-
5
)
//
栈Apop到第一个(
283
{
284
stackB.push( stackA.top() );
//
将栈Apop出的放入B
285
//
cout<<"stackA弹出一个"<<stackA.top()->strPath<<endl;
286
stackA.pop();
287
288
}
289
//
cout<<"stackA想弹出一个左小括号"<<stackA.top()->strPath<<endl;
290
stackA.pop();
//
去掉左小括号
291
292
int
pri[
3
]
=
{
0
};
293
while
(
!
stackB.empty() )
//
开始处理栈B内的表达式
294
{
295
p
=
stackB.top();
296
stackB.pop();
297
//
cout<<"stackB弹出一个"<<p->from<<endl;
298
if
( p
->
from
>=
0
)
299
stackC.push(p);
//
用栈C处理栈B的表达式
300
else
if
( p
->
from
==
-
1
)
301
{
302
pri[
0
]
++
;
303
stackC.push(p);
304
}
305
else
if
( p
->
from
==
-
2
)
306
{
307
if
( pri[
0
]
>
0
)
308
{
309
pri[
0
]
--
;
310
stateCountV
=
jihe( stateCountV, stackC );
311
}
312
313
stackC.push(p);
314
pri[
1
]
++
;
315
}
316
else
if
( p
->
from
==
-
3
)
317
{
318
while
( pri[
0
]
>
0
||
pri[
1
]
>
0
)
319
{
320
if
( pri[
0
]
>
0
)
321
{
322
pri[
0
]
--
;
323
stateCountV
=
jihe( stateCountV, stackC );
324
}
325
else
326
{
327
pri[
1
]
--
;
328
stateCountV
=
lianjie( stateCountV, stackC );
329
}
330
331
}
332
pri[
2
]
++
;
333
stackC.push(p);
334
}
335
else
if
( p
->
from
==
-
6
)
336
{
337
while
( pri[
0
]
>
0
||
pri[
1
]
>
0
||
pri[
2
]
>
0
)
338
{
339
if
( pri[
0
]
>
0
)
340
{
341
pri[
0
]
--
;
342
stateCountV
=
jihe( stateCountV, stackC );
343
}
344
else
if
( pri[
1
]
>
0
)
345
{
346
pri[
1
]
--
;
347
stateCountV
=
lianjie( stateCountV, stackC );
348
}
349
else
350
{
351
pri[
2
]
--
;
352
stateCountV
=
huo( stateCountV, stackC );
353
}
354
}
355
}
356
else
357
cout
<<
"
括号内含非法字符!
"
<<
endl;
358
}
359
//
cout<<"stackC 中此时应该有1个东东"<<stackC.size()<<"它是"<<stackC.top()->strPath<<endl;
360
361
//
cout<<"此时的parenthCount为"<<parenthCount<<endl;
362
stackA.push(stackC.top());
//
处理完成,栈C弹出结果
363
stackC.pop();
//
放入栈A
364
}
365
else
if
( str.at(j)
==
'
*
'
)
366
{
367
p
=
new
path();
368
p
->
from
=
-
1
;
369
p
->
to
=
-
1
;
370
p
->
strPath
+=
"
*
"
;
371
stackA.push(p);
372
}
373
else
if
( str.at(j)
==
'
&
'
)
374
{
375
p
=
new
path();
376
p
->
from
=
-
2
;
377
p
->
to
=
-
2
;
378
p
->
strPath
+=
"
&
"
;
379
stackA.push(p);
380
}
381
else
if
( str.at(j)
==
'
|
'
)
382
{
383
p
=
new
path();
384
p
->
from
=
-
3
;
385
p
->
to
=
-
3
;
386
p
->
strPath
+=
"
|
"
;
387
stackA.push(p);
388
}
389
else
390
{
391
stateCountV
++
;
392
temp
=
stateCountV;
393
stateCountV
++
;
394
T[temp][stateCountV]
+=
str.at(j);
395
p
=
new
path();
396
p
->
from
=
temp;
397
p
->
to
=
stateCountV;
398
p
->
strPath
+=
str.at(j);
399
stackA.push(p);
400
}
401
tv.insert(str.at(j));
//
放入tv中
402
j
++
;
403
}
404
}
405
else
406
cout
<<
"
含有非法字符!
"
<<
endl;
407
}
408
409
struct
dealStruct
*
res
=
new
dealStruct();
410
res
->
stCount
=
stateCountV;
411
res
->
resPtr
=
stackA.top();
412
stackA.pop();
413
cout
<<
"
NFA生成完毕!
"
<<
endl;
414
return
res;
415
}
416
417
418
int
jihe(
int
stateV, stack
<
struct
path
*>
&
stackA )
419
{
420
struct
path
*
b
=
stackA.top();stackA.pop();
421
struct
path
*
a
=
stackA.top();stackA.pop();
422
//
cout<<"pop the value for * "<<a->strPath<<endl;
423
//
cout<<"pop the sign for * "<<b->strPath<<endl;
424
425
stateV
++
;
426
int
temp
=
stateV;
427
stateV
++
;
428
T[temp][a
->
from]
+=
"
`
"
;
429
T[temp][stateV]
+=
"
`
"
;
430
T[a
->
to][stateV]
+=
"
`
"
;
431
T[a
->
to][a
->
from]
+=
"
`
"
;
432
433
struct
path
*
p
=
new
path();
434
p
->
from
=
temp;
435
p
->
to
=
stateV;
436
p
->
strPath
=
"
(
"
+
a
->
strPath
+
"
)*
"
;
437
stackA.push(p);
438
//
cout<<"push a new one of \""<<p->strPath<<"\""<<endl;
439
440
delete a;delete b;
441
return
stateV;
442
}
443
int
lianjie(
int
stateV, stack
<
struct
path
*>
&
stackV )
444
{
445
struct
path
*
a
=
stackV.top();stackV.pop();
446
//
cout<<"pop the second value for & "<<a->strPath<<endl;
447
struct
path
*
b
=
stackV.top();stackV.pop();
448
//
cout<<"pop the sign for & "<<b->strPath<<endl;
449
struct
path
*
c
=
stackV.top();stackV.pop();
450
//
cout<<"pop the first value for & "<<c->strPath<<endl;
451
T[c
->
to][a
->
from]
+=
"
`
"
;
452
453
struct
path
*
p
=
new
path();
454
p
->
from
=
c
->
from;
455
p
->
to
=
a
->
to;
456
p
->
strPath
=
"
(
"
+
c
->
strPath
+
"
)&(
"
+
a
->
strPath
+
"
)
"
;
457
stackV.push(p);
458
//
cout<<"push a new one of \""<<p->strPath<<"\""<<endl;
459
delete a;delete b;delete c;
460
461
return
stateV;
462
}
463
int
huo(
int
stateV, stack
<
struct
path
*>
&
stackA )
464
{
465
struct
path
*
a
=
stackA.top();stackA.pop();
466
struct
path
*
b
=
stackA.top();stackA.pop();
467
struct
path
*
c
=
stackA.top();stackA.pop();
468
//
cout<<"pop the first value for | "<<c->strPath<<endl;
469
//
cout<<"pop the second value for | "<<a->strPath<<endl;
470
//
cout<<"pop the sign for | "<<b->strPath<<endl;
471
472
stateV
++
;
473
int
temp
=
stateV;
474
stateV
++
;
475
T[temp][a
->
from]
+=
"
`
"
;
476
T[temp][c
->
from]
+=
"
`
"
;
477
T[a
->
to][stateV]
+=
"
`
"
;
478
T[c
->
to][stateV]
+=
"
`
"
;
479
480
struct
path
*
p
=
new
path();
481
p
->
from
=
temp;
482
p
->
to
=
stateV;
483
p
->
strPath
=
"
(
"
+
c
->
strPath
+
"
)|(
"
+
a
->
strPath
+
"
)
"
;
484
stackA.push(p);
485
//
cout<<"push a new one of \""<<p->strPath<<"\""<<endl;
486
delete a;delete b;delete c;
487
488
return
stateV;
489
}
490
491
int
reduceNFA(
int
stateCountV,
int
*
typeIndexV,
int
typeNumV )
//
stateCountV为NFA的起始状态
492
{
493
int
i,j,k;
494
struct
setNode
*
head,
*
setPtr,
*
end;
//
用于构造链表的指针,新状态用链表存储
495
struct
setNode
*
currentPtr;
//
用于遍历链表中元素的指针
496
497
set
<
int
>
::iterator ite_int;
//
每个新的状态为一个set,封装到setNode中
498
int
countNode
=
1
;
499
setPtr
=
new
setNode();
500
setPtr
->
num
=
1
;
501
setPtr
->
Dset
=
new
set
<
int
>
();
502
setPtr
->
next
=
NULL;
503
BFS(stateCountV, stateCountV);
504
for
( i
=
0
;arrForBFS[i]
>=
0
;i
++
)
//
先把起始状态的ε闭包求出作为新起始状态
505
setPtr
->
Dset
->
insert(arrForBFS[i]);
506
507
head
=
setPtr;
508
end
=
setPtr;
509
currentPtr
=
head;
510
511
string
letter1[
80
];
//
这里定义了两个数组,一个存放读入的字符
512
set
<
int
>
letter2[
80
];
//
一个存放读入相应字符后所到达的状态集合
513
int
up
=
0
;
514
515
516
while
( currentPtr
!=
NULL )
//
遍历新状态链表
517
{
518
//
cout<<"currentPtr遍历到"<<currentPtr->num<<endl;
519
//
下面一个双重循环,第一个循环表示遍历新状态集合的每一个状态
520
//
第二个循环用于从原NFA找从这个状态出发到达的所有状态
521
for
( ite_int
=
currentPtr
->
Dset
->
begin(); ite_int
!=
currentPtr
->
Dset
->
end();
++
ite_int )
522
for
( i
=
0
;i
<=
stateCountV;i
++
)
523
if
( T[
*
ite_int][i]
!=
"
`
"
&&
T[
*
ite_int][i]
!=
""
)
524
{
525
bool
flag2
=
0
;
526
for
( j
=
0
;j
<
up;j
++
)
527
if
( letter1[j]
==
T[
*
ite_int][i] )
528
{
529
flag2
=
1
;
530
break
;
531
}
532
if
(
!
flag2 )
533
{
534
up
++
;
535
letter1[up
-
1
]
=
T[
*
ite_int][i];
//
读入的字符都记录下
536
}
537
BFS( i, stateCountV );
538
for
( k
=
0
;arrForBFS[k]
>=
0
;k
++
)
539
letter2[j].insert(arrForBFS[k]);
//
所到的状态也记下
540
}
541
542
/*
cout<<"从新的状态"<<currentPtr->num<<"生成的矩阵是"<<endl;
543
for( i = 0;i < up;i++ )
544
{
545
cout<<letter1[i]<<" ";
546
for( ite_int = letter2[i].begin(); ite_int != letter2[i].end(); ++ite_int )
547
cout<<*ite_int<<" ";
548
cout<<endl;
549
}//最后letter1,letter2记录了这个新状态读入不同的字符后会到达的状态合集
*/
550
551
552
for
( i
=
0
;i
<
up;i
++
)
//
下面开始比较这些状态是不是以前有过的
553
{
554
struct
setNode
*
p
=
head;
555
while
( p
!=
NULL )
556
{
557
if
( letter2[i]
==
*
(p
->
Dset) )
558
break
;
559
p
=
p
->
next;
560
}
561
if
( p
!=
NULL )
//
有这个状态,只要加一条路径即可
562
recT[currentPtr
->
num][p
->
num]
+=
letter1[i];
563
else
//
没有这个状态,要在链表末加一个节点,并在recT中加路径
564
{
565
countNode
++
;
566
recT[currentPtr
->
num][countNode]
+=
letter1[i];
567
setPtr
=
new
setNode();
568
setPtr
->
num
=
countNode;
569
setPtr
->
Dset
=
new
set
<
int
>
();
570
for
( ite_int
=
letter2[i].begin(); ite_int
!=
letter2[i].end();
++
ite_int )
571
setPtr
->
Dset
->
insert(
*
ite_int );
572
setPtr
->
next
=
NULL;
573
end
->
next
=
setPtr;
574
end
=
setPtr;
575
576
for
( k
=
0
;k
<
typeNumV;k
++
)
577
{
578
ite_int
=
setPtr
->
Dset
->
find(typeIndexV[k]);
579
if
( ite_int
!=
setPtr
->
Dset
->
end()
||
*
(setPtr
->
Dset
->
end())
==
stateCountV )
580
{
581
char
temp[
10
];
582
string
str
=
itoa(k,temp,
10
);
583
recT[setPtr
->
num][
0
]
=
"
-
"
+
str;
//
如果是终止状态,做记号
584
recT[
0
][setPtr
->
num]
=
"
-
"
+
str;
585
}
586
}
587
}
588
}
589
for
( i
=
0
;i
<
up;i
++
)
//
清空letter1,letter2,用于下一轮遍历
590
{
591
letter1[i]
=
""
;
592
letter2[i].clear();
593
}
594
up
=
0
;
595
currentPtr
=
currentPtr
->
next;
596
597
/*
cout<<"此时的链表为"<<endl;
598
struct setNode *p2 = head;
599
while( p2 != NULL )
600
{
601
cout<<p2->num<<" ";
602
p2 = p2->next;
603
}
604
cout<<endl;
*/
605
}
606
607
cout
<<
"
DFA生成完毕!
"
<<
endl;
608
/*
cout<<"最后DFA为:"<<endl;
609
for( i = 0;i <= countNode;i++,cout<<endl )
610
for( j = 0; j <= countNode;j++ )
611
{
612
if( recT[i][j] == "" )cout<<" |";
613
else
614
cout<<setw(2)<<recT[i][j]<<"|";
615
}
*/
616
return
countNode;
617
618
}
619
620
void
BFS(
int
row,
int
stateCountV )
//
广度搜索法求一个状态的ε闭包
621
{
622
for
(
int
i
=
0
;i
<
100
;i
++
)
623
arrForBFS[i]
=
-
1
;
624
int
ite
=
-
1
;
625
queue
<
int
>
que;
626
que.push(row);
627
ite
++
;
628
arrForBFS[ite]
=
row;
629
while
(
!
que.empty() )
630
{
631
int
t
=
que.front();
632
que.pop();
633
for
(
int
i
=
0
;i
<=
stateCountV;i
++
)
634
if
( T[t][i]
==
"
`
"
)
635
{
636
ite
++
;
637
arrForBFS[ite]
=
i;
638
que.push(i);
639
}
640
}
641
}
642
643
void
reduceDFA()
//
化简DFA算法
644
{
645
/*
646
算法思想:
647
构造二维数组reduceState[2][stateCountDFA]
648
矩阵第一行存储各状态当前所属集合标记
649
矩阵第二行存储个状态读入某个字符后的目标状态所属集合标记
650
每读入某个字符后,矩阵两行相对应位置都相等的状态为同一集合
651
第一行相等而第二行不相等的两状态分到不同集合,即将后一个状态的所属集合标记+1
652
全部字符读完后,扫描矩阵的第一行,即当前所属集合标记,相等的状态合并
653
简化完成
654
*/
655
int
pos
=
1
;
//
非终结状态所属集合标记
656
int
poe;
//
终结状态所属集合标记
657
for
(
int
i
=
1
;i
<=
stateCountDFA;i
++
)
//
该循环用于给所有状态的所属集合赋初值,终结状态为stateCountDFA,非终结状态为1.
658
{
659
if
(recT[
0
][i].find(
'
-
'
)
!=
string
::npos)
660
{
661
poe
=
stateCountDFA
+
atoi(
&
recT[
0
][i].at(
1
));
//
将不同正则表达式的终结状态标注为不同集合:stateCountDFA+正则表达式序号
662
reduceState[
0
][i]
=
poe;
663
reduceState[
1
][i]
=
poe;
664
}
665
else
666
{
667
reduceState[
0
][i]
=
pos;
668
reduceState[
1
][i]
=
pos;
669
}
670
}
671
poe
=
stateCountDFA
+
regCount
+
1
;
672
bool
flag;
//
bool标记,读每个字符,调整集合状态后,重新读入该字符,直到不发生集合改变为止
673
for
(Vt
=
tv.begin();Vt
!=
tv.end();Vt
++
)
//
读入每个字符的循环
674
{
675
//
cout<<*Vt;
676
flag
=
true
;
677
while
(flag)
//
该循环即为重新读入当前字符的循环
678
{
679
flag
=
false
;
680
bool
item
=
true
;
//
用于标记某状态读入当前字符是否产生状态变化
681
for
(
int
j
=
1
;j
<=
stateCountDFA;j
++
)
682
{
683
item
=
false
;
684
for
(
int
k
=
1
;k
<=
stateCountDFA;k
++
)
//
循环遍历DFA
685
{
686
if
(recT[j][k].find(
*
Vt)
!=
string
::npos)
//
若当前状态读入该字符产生状态变化
687
{
688
item
=
true
;
689
reduceState[
1
][j]
=
reduceState[
0
][k];
//
目标集合赋值为目标状态所属集合
690
}
691
if
(
!
item)
//
不产生状态变化
692
reduceState[
1
][j]
=
0
;
//
目标集合标记为0
693
}
694
695
}
696
bool
b1
=
false
;
//
标记用于判断pos是否加1
697
bool
b2
=
false
;
//
标记用于判断poe是否加1
698
for
(
int
p
=
1
;p
<=
stateCountDFA;p
++
)
//
循环扫描reduceState矩阵.第一个标记位置
699
{
700
b1
=
false
;b2
=
false
;
701
for
(
int
q
=
2
;q
<=
stateCountDFA;q
++
)
//
循环扫描reduceState矩阵.第二个标记位置
702
{
703
if
(reduceState[
0
][p]
==
reduceState[
0
][q]
&&
reduceState[
1
][p]
!=
reduceState[
1
][q])
704
{
705
//
如果存在所属集合相等,目标集合不等的情况,将所属集合分开
706
if
(reduceState[
0
][q]
<
stateCountDFA)
//
当前状态为非终结状态
707
{
708
b1
=
true
;
709
reduceState[
0
][q]
=
pos;
710
}
711
else
//
当前状态为终结状态
712
{
713
b2
=
true
;
714
reduceState[
0
][q]
=
poe;
715
}
716
flag
=
true
;
//
发生集合调整,标记为true
717
}
718
}
719
if
(b1)
720
pos
++
;
721
if
(b2)
722
poe
++
;
723
}
724
}
725
}
726
727
//
for(int i1=0;i1<stateCountDFA;i1++)
728
//
cout<<reduceState[0][i1]<<" ";
729
//
cout<<endl;
730
//
for(i1=0;i1<stateCountDFA;i1++)
731
//
cout<<reduceState[1][i1]<<" ";
732
733
for
(
int
j
=
1
;j
<
stateCountDFA;j
++
)
//
循环扫描reduceState矩阵第一行.第一个标记位置
734
{
735
if
(reduceState[
0
][j]
!=
0
)
736
{
737
for
(
int
k
=
j
+
1
;k
<=
stateCountDFA;k
++
)
//
循环扫描reduceState矩阵第一行.第二个标记位置
738
{
739
if
(reduceState[
0
][j]
==
reduceState[
0
][k])
//
两个标记位置值相等,即所属集合相同
740
combine(j,k);
//
调用合并函数
741
}
742
}
743
}
744
/*
cout<<"化简后的DFA为:"<<endl;
745
for(int m = 0;m <= stateCountDFA;m++,cout<<endl )
746
for(int n = 0; n <= stateCountDFA;n++ )
747
{
748
if( recT[m][n] == "" )cout<<" |";
749
else
750
cout<<recT[m][n]<<"|";
751
}
*/
752
753
cout
<<
"
DFA简化完毕!
"
<<
endl;
754
}
755
756
void
combine(
int
j,
int
k)
757
{
758
/*
759
函数用于合并两个状态
760
把第K状态的相应的行和列的值加到第j状态上
761
并把k状态所有的值置空
762
*/
763
for
(
int
i
=
1
;i
<=
stateCountDFA;i
++
)
764
{
765
recT[j][i]
+=
recT[k][i];
766
recT[i][j]
+=
recT[i][k];
767
}
768
for
(i
=
0
;i
<=
stateCountDFA;i
++
)
769
{
770
recT[k][i]
=
""
;
771
recT[i][k]
=
""
;
772
}
773
reduceState[
0
][k]
=
0
;
774
}
775
776
void
produceC(
int
nodeCountV,
string
*
typeNameV )
//
产生.cpp文件
777
{
778
int
i, j ,k;
779
char
buf[
1024
];
//
临时保存读取出来的文件内容
780
string
message
=
""
;
781
string
remainCode
=
""
;
782
ifstream
in
;
783
in
.open(
"
Remain.txt
"
);
784
if
(
in
.is_open())
//
文件打开成功,说明曾经写入过东西
785
{
786
while
(
in
.good()
&&
!
in
.eof())
787
{
788
memset(buf,
0
,
1024
);
789
in
.getline(buf,
1204
);
790
message
=
buf;
791
message
+=
"
\n
"
;
792
remainCode
+=
message;
793
}
794
in
.close();
795
}
796
797
ofstream fout(
"
scanner.cpp
"
,ios::
out
);
798
fout
<<
remainCode;
799
fout
<<
"
while(flag)\n
"
<<
"
{\n
"
;
800
fout
<<
"
switch(state)\n
"
<<
"
{\n
"
;
801
for
( i
=
1
;i
<=
nodeCountV;i
++
)
//
遍历每个新状态
802
{
803
fout
<<
"
case
"
<<
i
<<
"
:\n
"
;
804
fout
<<
"
last++;\n
"
;
805
fout
<<
"
c = code[last];\n
"
;
806
if
( i
==
1
)
807
{
808
fout
<<
"
if( c == '%' )\n
"
;
//
读到文件末尾,终结
809
fout
<<
"
{\n
"
;
810
fout
<<
"
flag = 0;\n
"
;
811
fout
<<
"
break;\n}\n
"
;
812
}
813
bool
flag3
=
0
;
bool
flag2
=
0
;
814
for
( j
=
1
;j
<=
nodeCountV;j
++
)
815
if
( recT[i][j]
!=
"
'
"
&&
recT[i][j]
!=
""
)
816
{
817
flag3
=
1
;
818
if
( flag2 )
819
fout
<<
"
else
"
;
820
821
fout
<<
"
if( contain( \
""
<<recT[i][j]<<
"
\
"
,c ) )
"
;
822
fout
<<
"
state =
"
<<
j
<<
"
;\n
"
;
823
flag2
=
1
;
824
}
825
if
( flag3 )
826
fout
<<
"
else\n{\n
"
;
827
//
fout<<"{\n"<<"endState.find("<<i<<");\n";
828
//
fout<<"if( ite_int == endState.find && *(endState.end()) != "<<i<<")\n";
829
if
( recT[i][
0
][
0
]
==
'
-
'
)
830
{
831
string
tempIndexStr
=
recT[i][
0
].substr(
1
,recT[i][
0
].length()
-
1
);
832
int
tempIndex
=
atoi(tempIndexStr.c_str());
833
string
type
=
typeNameV[tempIndex];
834
fout
<<
"
flag2 = true;\n
"
;
835
fout
<<
"
for( k = 0;k < count;k++ )\n
"
;;
836
fout
<<
"
{\n
"
<<
"
if( SymbolTable[k][1] == code.substr(first,last-first) )\n
"
;
837
fout
<<
"
{\nflag2 = false;\nbreak;\n}\n}\n
"
;
838
fout
<<
"
if(flag2)\n{\n
"
;
839
fout
<<
"
count++;\n
"
;
840
fout
<<
"
SymbolTable[count-1][0] = \
""
<<type<<
"
\
"
;\n
"
;
841
fout
<<
"
SymbolTable[count-1][1] = code.substr(first,last-first);\n
"
;
842
fout
<<
"
}\n
"
;
843
844
fout
<<
"
cout<<code.substr(first,last-first)<<endl;\n
"
;
845
fout
<<
"
last--;\n
"
;
846
fout
<<
"
first = last+1;\n
"
;
847
fout
<<
"
state = 1;\n
"
;
848
}
849
else
850
{
851
fout
<<
"
cout<<\
"
编译错误
!
\\n\
"
;\n
"
;
852
fout
<<
"
flag = 0;\n
"
;
853
}
854
if
( flag3 )
855
fout
<<
"
}\n
"
;
856
fout
<<
"
break;\n
"
;
857
}
858
fout
<<
"
default:\n
"
<<
"
break;\n
"
<<
"
}\n}\n
"
;
859
860
fout
<<
"
cout<<\
"
Symbol 表如下:\
"
<<endl;\n
"
;
861
fout
<<
"
cout<<setw(3)<<\
"
序号\
"
<<setw(10)<<\
"
token类型\
"
<<\
"
\
"
<<\
"
token名字\
"
<<endl;\n
"
;
862
fout
<<
"
for( k2 = 0;k2 < count;k2++ )\n
"
;
863
fout
<<
"
{\n
"
<<
"
cout<<setw(3)<<k2<<setw(10)<<SymbolTable[k2][0]<<\
"
\
"
<<SymbolTable[k2][1]<<endl;\n
"
;
864
fout
<<
"
}\n
"
;
865
fout
<<
"
system(\
"
pause\
"
);\n
"
;
866
fout
<<
"
return 0;\n}\n
"
;
867
868
cout
<<
"
代码生成完毕!见scanner.cpp文件。
"
<<
endl;
869
}