1.NFA转DFA原理
https://blog.csdn.net/weixin_43655282/article/details/108963761
2.
set取交集/并集https://blog.csdn.net/u013095333/article/details/89322501
set取差集https://blog.csdn.net/qq_34999565/article/details/115152282
//DFA结构体
struct DFA {
set<int> Q;
set<char> alpha;
int sigma[25][250];
int start;
set<int> end;
}DFA_instance;
e-closure(s)是状态s经由e边可到达的所有状态的集合
思路:bfs或dfs遍历求解
for (auto q = NFA_instance.Q.begin(); q != NFA_instance.Q.end(); q++) {
if (!NFA_instance.sigma[*q]['$'].empty()) {
bfs.push(*q);
while (!bfs.empty()) {
int top = bfs.top();
bfs.pop();
e_closure[*q].insert(top);
if (!NFA_instance.sigma[top]['$'].empty()) {
for (auto q1 = NFA_instance.sigma[top]['$'].begin(); q1 != NFA_instance.sigma[top]['$'].end(); q1++) {
bfs.push(*q1);
}
}
}
}
else {
e_closure[*q].insert(*q);
}
}
思路:move(I,alpha)是对于I集合中的每个状态s,面对非空输入符号alpha,能转化到的状态集合,通过求解move(I,alpha)后,我们就能利用之前的e-closure求出e-{move(I,alpha)},也就是状态子集。
TIPS:这是非常重要的一步,也是最容易出错的一步,要谨慎考虑。(具体考虑的细节可以在代码中查看,但大体的思路就到这里了,建议看了这段思路后读者自己思考下如何实现,不要马上看代码)
//求出第一个状态子集
//Dstates[0]对应NFA_instance.start的闭包
set_union(e_closure[NFA_instance.start].begin(), e_closure[NFA_instance.start].end(), temp.begin(), temp.end(), inserter(Dstates[0], Dstates[0].begin()));
int j = 0, p = 0;
//转换矩阵DFA_instance.sigma
for (int i = 0; i < 25; i++) {
for (int j = 0; j < 250; j++) {
DFA_instance.sigma[i][j] = -1;
}
}
//j表示当前处理的状态子集
//p表示总状态子集数
while (j <= p) {
for (auto alpha = DFA_instance.alpha.begin(); alpha != DFA_instance.alpha.end(); alpha++) {
set<int> temp_Dstate;
set<int> e_temp_Dstate;
//得到Dstates[j]有符号边的转换集合temp_Dstate
//Dstates[j]面对每一种符号输入的处理
for (auto q = Dstates[j].begin(); q != Dstates[j].end(); q++) {
if (!NFA_instance.sigma[*q][*alpha].empty()) {
for (auto q1 = NFA_instance.sigma[*q][*alpha].begin(); q1 != NFA_instance.sigma[*q][*alpha].end(); q1++) {
temp_Dstate.insert(*q1);
}
}
}
//test 测试符号闭包
//发现未清空temp_Dstate和e_temp_Dstate导致错误
//cout << "closure of " << j << " with alpha " << *alpha << " : ";
//for (auto q = temp_Dstate.begin(); q != temp_Dstate.end(); q++) {
// cout << *q << " ";
//}
//cout << endl;
//求出temp_Dstate转换集合的e闭包e_temp_Dstate
//转换集合temp_Dstate为空,一定没有产生新状态,产生的e_temp_Dstate是原本的Dstate[j]
//转换集合temp_Dstate不为空,可能产生新状态,需要将产生的新状态与之前的Dstates中记录的对比
if (!temp_Dstate.empty()) {
for (auto state = temp_Dstate.begin(); state != temp_Dstate.end(); state++) {
int index = *state;
set_union(e_closure[index].begin(), e_closure[index].end(), temp.begin(), temp.end(), inserter(e_temp_Dstate, e_temp_Dstate.begin()));
}
//flag 1表示产生新状态
// 0 未产生新状态
int flag = 1;
int index = -1;
//index用于未产生新状态时,标记当前符号输入下产生的状态对应哪个旧状态
for (int i = 0; i <= p; i++) {
set<int> diff;
set_difference(e_temp_Dstate.begin(), e_temp_Dstate.end(), Dstates[i].begin(), Dstates[i].end(), inserter(diff, diff.begin()));
if (diff.empty()) {
flag = 0;
index = i;
break;
}
}
if (flag == 1) {
p++;
set_union(e_temp_Dstate.begin(), e_temp_Dstate.end(), temp.begin(), temp.end(), inserter(Dstates[p], Dstates[p].begin()));
DFA_instance.sigma[j][*alpha] = p;
}
else {
DFA_instance.sigma[j][*alpha] = index;
}
}
else {
DFA_instance.sigma[j][*alpha] = -1;
}
}
//cout << "closure of " << j << " : ";
//for (auto q = Dstates[j].begin(); q != Dstates[j].end(); q++) {
// cout << *q << " ";
//}
//cout << endl;
j++;
}
//DFA结构体
struct DFA {
set<int> Q;
set<char> alpha;
int sigma[25][250];
int start;
set<int> end;
}DFA_instance;
set<int> e_closure[25];
stack<int> bfs;
set<int> Dstates[25];
set<int> temp;
set<char> temp_c;
void NFA2DFA() {
////test 初始化NFA
//for (int i = 0; i <= 9; i++) {
// NFA_instance.Q.insert(i);
//}
//NFA_instance.start = 0;
//NFA_instance.end.insert(9);
//NFA_instance.alpha.insert('$');
//NFA_instance.alpha.insert('a');
//NFA_instance.alpha.insert('b');
//NFA_instance.sigma[0]['$'].push_back(1);
//NFA_instance.sigma[0]['$'].push_back(7);
//NFA_instance.sigma[1]['$'].push_back(2);
//NFA_instance.sigma[1]['$'].push_back(4);
//NFA_instance.sigma[2]['a'].push_back(3);
//NFA_instance.sigma[3]['$'].push_back(6);
//NFA_instance.sigma[4]['b'].push_back(5);
//NFA_instance.sigma[5]['$'].push_back(6);
//NFA_instance.sigma[6]['$'].push_back(1);
//NFA_instance.sigma[6]['$'].push_back(7);
//NFA_instance.sigma[7]['a'].push_back(8);
//NFA_instance.sigma[8]['b'].push_back(9);
//DFA和NFA的alpha字符集合并
set_union(NFA_instance.alpha.begin(), NFA_instance.alpha.end(), temp_c.begin(), temp_c.end(), inserter(DFA_instance.alpha,DFA_instance.alpha.begin()));
//DFA中无$边
DFA_instance.alpha.erase('$');
//求出各状态的e-closure
for (auto q = NFA_instance.Q.begin(); q != NFA_instance.Q.end(); q++) {
if (!NFA_instance.sigma[*q]['$'].empty()) {
bfs.push(*q);
while (!bfs.empty()) {
int top = bfs.top();
bfs.pop();
e_closure[*q].insert(top);
if (!NFA_instance.sigma[top]['$'].empty()) {
for (auto q1 = NFA_instance.sigma[top]['$'].begin(); q1 != NFA_instance.sigma[top]['$'].end(); q1++) {
bfs.push(*q1);
}
}
}
}
else {
e_closure[*q].insert(*q);
}
}
//验证e-closure输出的正确性
//for (auto q = NFA_instance.Q.begin(); q != NFA_instance.Q.end(); q++) {
// cout << "e-closure of " << *q << " : ";
// for (auto q1 = e_closure[*q].begin(); q1 != e_closure[*q].end(); q1++) {
// cout << *q1 << " ";
// }
// cout << endl;
//}
//求出第一个状态子集
//Dstates[0]对应NFA_instance.start的闭包
set_union(e_closure[NFA_instance.start].begin(), e_closure[NFA_instance.start].end(), temp.begin(), temp.end(), inserter(Dstates[0], Dstates[0].begin()));
int j = 0, p = 0;
//转换矩阵DFA_instance.sigma
for (int i = 0; i < 25; i++) {
for (int j = 0; j < 250; j++) {
DFA_instance.sigma[i][j] = -1;
}
}
//j表示当前处理的状态子集
//p表示总状态子集数
while (j <= p) {
for (auto alpha = DFA_instance.alpha.begin(); alpha != DFA_instance.alpha.end(); alpha++) {
set<int> temp_Dstate;
set<int> e_temp_Dstate;
//得到Dstates[j]有符号边的转换集合temp_Dstate
//Dstates[j]面对每一种符号输入的处理
for (auto q = Dstates[j].begin(); q != Dstates[j].end(); q++) {
if (!NFA_instance.sigma[*q][*alpha].empty()) {
for (auto q1 = NFA_instance.sigma[*q][*alpha].begin(); q1 != NFA_instance.sigma[*q][*alpha].end(); q1++) {
temp_Dstate.insert(*q1);
}
}
}
//test 测试符号闭包
//发现未清空temp_Dstate和e_temp_Dstate导致错误
//cout << "closure of " << j << " with alpha " << *alpha << " : ";
//for (auto q = temp_Dstate.begin(); q != temp_Dstate.end(); q++) {
// cout << *q << " ";
//}
//cout << endl;
//求出temp_Dstate转换集合的e闭包e_temp_Dstate
//转换集合temp_Dstate为空,一定没有产生新状态,产生的e_temp_Dstate是原本的Dstate[j]
//转换集合temp_Dstate不为空,可能产生新状态,需要将产生的新状态与之前的Dstates中记录的对比
if (!temp_Dstate.empty()) {
for (auto state = temp_Dstate.begin(); state != temp_Dstate.end(); state++) {
int index = *state;
set_union(e_closure[index].begin(), e_closure[index].end(), temp.begin(), temp.end(), inserter(e_temp_Dstate, e_temp_Dstate.begin()));
}
//flag 1表示产生新状态
// 0 未产生新状态
int flag = 1;
int index = -1;
//index用于未产生新状态时,标记当前符号输入下产生的状态对应哪个旧状态
for (int i = 0; i <= p; i++) {
set<int> diff;
set_difference(e_temp_Dstate.begin(), e_temp_Dstate.end(), Dstates[i].begin(), Dstates[i].end(), inserter(diff, diff.begin()));
if (diff.empty()) {
flag = 0;
index = i;
break;
}
}
if (flag == 1) {
p++;
set_union(e_temp_Dstate.begin(), e_temp_Dstate.end(), temp.begin(), temp.end(), inserter(Dstates[p], Dstates[p].begin()));
DFA_instance.sigma[j][*alpha] = p;
}
else {
DFA_instance.sigma[j][*alpha] = index;
}
}
else {
DFA_instance.sigma[j][*alpha] = -1;
}
}
//cout << "closure of " << j << " : ";
//for (auto q = Dstates[j].begin(); q != Dstates[j].end(); q++) {
// cout << *q << " ";
//}
//cout << endl;
j++;
}
for (int i = 0; i <= p; i++) {
DFA_instance.Q.insert(i);
}
DFA_instance.start = 0;
for (auto q = NFA_instance.end.begin(); q != NFA_instance.end.end(); q++) {
for (int i = 0; i <= p; i++) {
if (Dstates[i].find(*q) != Dstates[i].end()) {
DFA_instance.end.insert(i);
}
}
}
}
void printDFA() {
cout << "graph DFA" << endl;
for (auto q = DFA_instance.Q.begin(); q != DFA_instance.Q.end(); q++) {
for (auto q1 = DFA_instance.alpha.begin(); q1 != DFA_instance.alpha.end(); q1++) {
if (DFA_instance.sigma[*q][*q1] != -1) {
int q2 = DFA_instance.sigma[*q][*q1];
if (DFA_instance.start == q2) {
cout << "(start) " << *q << " --> " << *q1 << " --> " << q2 << endl;
}
else {
if (DFA_instance.end.find(q2) != DFA_instance.end.end()) {
cout << *q << " --> " << *q1 << " --> " << q2 << " (end)" << endl;
}
else {
cout << *q << " --> " << *q1 << " --> " << q2 << endl;
}
}
}
}
}
}