词法分析中重要的一步是NFA的确定化,一般是通过子集法来确定化!并且,有定理:设L是由一NFA接受的正规集,则存在一个DFA接受L。
子集法的算法如下:
设NFA为M=(K,Σ,f,S0,Z),则构造相应的DFA M′=(Q,Σ,f′,I0,F)
1取I0=S0;
2对于状态集Q中任一尚未标记的状态qi={Si1,Si2,…,Sim},Sik∈K,做:
(1) 标记qi;
(2) 对于每个a∈Σ,置
T=f({Si1,Si2,…,Sim},a)
qj=εCLOSURE(T)
(3) 若qj不在Q中,则将qj作为一个未加标记的状态添加到Q中,且把状态转移f′(qi,a)=qj添加到M′。
3重复进行步骤2,直到Q中不再含有未标记的状态为止。对于由此构造的Q,我们把那些至少含有一个Z中的元素的qi作为M′的终态。
#include<map> using namespace std; class StateTransitionDiagram { public: StateTransitionDiagram();//constructor StateTransitionDiagram(int s);//constructor with a initial value int getNode();//get the value of the node multimap<char, int> getTransitionMatrix();//get the transition matrix void addTransition(char val,int n);//add transition to the current node void print();//print the graph private: int node; //the number of the node multimap<char, int>transitionMatrix;//record the state transition };
#include"StateTransitionDiagram.h" #include<map> #include<iostream> using namespace std; StateTransitionDiagram::StateTransitionDiagram() { node = 0; } StateTransitionDiagram::StateTransitionDiagram(int n) { node = n; } int StateTransitionDiagram::getNode() { return node; } multimap<char, int> StateTransitionDiagram::getTransitionMatrix() { return transitionMatrix; } void StateTransitionDiagram::addTransition(char v,int n) { transitionMatrix.insert(make_pair(v,n)); } void StateTransitionDiagram::print() { for (multimap<char, int>::iterator it = transitionMatrix.begin(); it != transitionMatrix.end(); ++it) cout << "node:" << node << " edge value:" << it->first << " node:" << it->second << endl; }
#include"StateTransitionDiagram.h" #include<iostream> #include<deque> #include<map> #include<set> #include<stack> #include<vector> using namespace std; #define EPSILON '#' typedef deque<StateTransitionDiagram*>NFA; //define NFA with a deque,and the type of the element in the deque id StateTransitionDiagram typedef deque<StateTransitionDiagram*>DFA;//same with NFA typedef set<int>::iterator set_it; typedef multimap<char, int>::iterator multimap_it; void NFAtoDFA(); //determine the NFA with subset method set<int> epsilonClosure(set<int> &s); //epsilon closure multimap<char, int> getTranMatrix(int n); //get the transition matrix of the node n set<int>getSet(set<int>&,char); //get set when input a char bool getUnhandled(multimap<set<int>, int>::iterator &it); //judge whether the node is handled map<set<int>, int>dfaTran; //transition in the dfa set<char> inputChar; //input char vector<int>dfaT; vector<int>nfaT; NFA nfa; //nfa DFA dfa; //dfa int main() { StateTransitionDiagram *n0 = new StateTransitionDiagram(); StateTransitionDiagram *n1 = new StateTransitionDiagram(1); n0->addTransition('a',0); n0->addTransition('b',1); n1->addTransition('a',1); n1->addTransition('a', 0); n1->addTransition('b', 0); inputChar.insert('a'); inputChar.insert('b'); nfa.push_back(n0); nfa.push_back(n1); nfaT.push_back(1); NFAtoDFA(); cout << "---------------------------NFA----------------------------" << endl; for (NFA::iterator it = nfa.begin(); it != nfa.end(); ++it) (*it)->print(); cout << "Terminal statein NFA:"; for (vector<int>::iterator it=nfaT.begin(); it != nfaT.end(); ++it) cout << *it << " "; cout << endl; cout << "---------------------------DFA----------------------------" << endl; for (DFA::iterator it = dfa.begin(); it != dfa.end(); ++it) (*it)->print(); cout << "Terminal statein NFA:"; for (vector<int>::iterator it = dfaT.begin(); it != dfaT.end(); ++it) cout << *it << " "; cout << endl; return 0; } multimap<char, int> getTranMatrix(int n) { multimap<char, int>multim; NFA::iterator it; //find the postion of the node n for (it = nfa.begin(); it != nfa.end(); ++it) { if (n == (*it)->getNode()) { multim = (*it)->getTransitionMatrix(); break; } } return multim; } set<int>epsilonClosure(set<int> &s) { set<int> closureSet = s; //first, add the initial set to the closure set stack<int>nodeStack; //save all the node int currentNode; //the node being handling for (set_it it = closureSet.begin(); it != closureSet.end(); ++it) nodeStack.push(*it); while (!nodeStack.empty()) { //get the node in the top of the state stack currentNode = nodeStack.top(); nodeStack.pop(); //get the transition matrix multimap<char, int> tranMax = getTranMatrix(currentNode); for (multimap_it it = tranMax.begin(); it != tranMax.end(); ++it) { //if the edge is epsilon and the node is not included in the set,then add the node if (it->first == EPSILON && closureSet.find(it->second)==closureSet.end()) { closureSet.insert(it->second); nodeStack.push(it->second); } } } return closureSet; } set<int>getSet(set<int> &s, char ch) { set<int>ss; stack<int>nodeStack; multimap<char, int>mmap; for (set_it it = s.begin(); it != s.end(); ++it) nodeStack.push(*it); while (!nodeStack.empty()) { //get the transition matrix mmap = getTranMatrix(nodeStack.top()); nodeStack.pop(); for (multimap_it it = mmap.begin(); it != mmap.end(); ++it) { //if match the ch,then add it into the set if (it->first == ch) { ss.insert(it->second); } } } return ss; } bool getUnhandled(multimap<set<int>, int>::iterator &it) { for ( it= dfaTran.begin(); it != dfaTran.end(); ++it) { if (it->second < 0) return true; } return false; } void NFAtoDFA() { //initiate dfa with the initial state of nfa set<int> s; s.insert(0); dfaTran.insert(make_pair(s,0)); map<set<int>, int>::iterator map_it = dfaTran.begin(); //constructor the DFA do { s = map_it->first; map_it->second = abs(map_it->second); //create a new node, if map_it is negtive,the state is not handled,otherwise,handled StateTransitionDiagram *sTranNode = new StateTransitionDiagram(map_it->second); dfa.push_back(sTranNode); //find the terminal node for (vector<int>::iterator it = nfaT.begin(); it != nfaT.end(); ++it) { if (s.find(*it) != s.end()) { dfaT.push_back(map_it->second); } } for (set<char>::iterator it = inputChar.begin(); it != inputChar.end(); ++it) { set<int>tempSet = epsilonClosure(getSet(s,*it)); if (!tempSet.empty())//if the set is empty,then no edge { if (dfaTran.find(tempSet) == dfaTran.end()) { dfaTran.insert(make_pair(tempSet,-(int)dfaTran.size())); } map_it = dfaTran.find(tempSet); sTranNode->addTransition(*it,abs(map_it->second)); } } } while (getUnhandled(map_it)); }
代码的实现用到了很多STL里面的类库,STL确实很强大,节省了很多的开发时间,灵活使用STL需要更多的实践经验。编译原理解释了程序的执行原理,如果能理解透彻其中的深层次原理,对编程本身也是一种很大的提升,如果能自己去构造一个编译器,亲身去实现其中的算法,可以想象。不过这确实需要很大的耐心,对编程能力也有很高的要求!少年,任重而道远,加油吧!