子集法NFA转DFA

      词法分析中重要的一步是NFA的确定化,一般是通过子集法来确定化!并且,有定理:设L是由一NFA接受的正规集,则存在一个DFA接受L。

子集法的算法如下:

设NFA为M=(K,Σ,f,S0,Z),则构造相应的DFA M′=(Q,Σ,f′,I0,F)

1取I0=S0;
2对于状态集Q中任一尚未标记的状态qi={Si1,Si2,…,Sim},Sik∈K,做:
 (1) 标记qi;
 (2) 对于每个a∈Σ,置
     T=f({Si1,Si2,…,Sim},a)
     qj=εCLOSURE(T)
 (3) 若qj不在Q中,则将qj作为一个未加标记的状态添加到Q中,且把状态转移f′(qi,a)=qj添加到M′。
3重复进行步骤2,直到Q中不再含有未标记的状态为止。对于由此构造的Q,我们把那些至少含有一个Z中的元素的qi作为M′的终态。


#include<map>
using namespace std;
class StateTransitionDiagram
{
public:
	StateTransitionDiagram();//constructor
	StateTransitionDiagram(int s);//constructor with a initial value
	int getNode();//get the value of the node
	multimap<char, int> getTransitionMatrix();//get the transition matrix
	void addTransition(char val,int n);//add transition to the current node
	void print();//print the graph
private:
	int node; //the number of the node
	multimap<char, int>transitionMatrix;//record the state transition
};

节点类函数实现:

#include"StateTransitionDiagram.h"
#include<map>
#include<iostream>
using namespace std;

StateTransitionDiagram::StateTransitionDiagram()
{
	node = 0;
}

StateTransitionDiagram::StateTransitionDiagram(int n)
{
	node = n;
}

int StateTransitionDiagram::getNode()
{
	return node;
}

multimap<char, int> StateTransitionDiagram::getTransitionMatrix()
{
	return transitionMatrix;
}

void StateTransitionDiagram::addTransition(char v,int n)
{
	transitionMatrix.insert(make_pair(v,n));
}

void StateTransitionDiagram::print()
{
	for (multimap<char, int>::iterator it = transitionMatrix.begin(); it != transitionMatrix.end(); ++it)
		cout << "node:" << node << "   edge value:" << it->first << "  node:" << it->second << endl;
}

NFA转DFA主要代码:

#include"StateTransitionDiagram.h"
#include<iostream>
#include<deque>
#include<map>
#include<set>
#include<stack>
#include<vector>
using namespace std;

#define EPSILON '#'
typedef deque<StateTransitionDiagram*>NFA; //define NFA with a deque,and the type of the element in the deque id StateTransitionDiagram
typedef deque<StateTransitionDiagram*>DFA;//same with NFA
typedef set<int>::iterator set_it;
typedef multimap<char, int>::iterator multimap_it;
void NFAtoDFA();                                         //determine the NFA with subset method
set<int> epsilonClosure(set<int> &s);             //epsilon closure
multimap<char, int> getTranMatrix(int n);        //get the transition matrix of the node n
set<int>getSet(set<int>&,char);                      //get set when input a char
bool getUnhandled(multimap<set<int>, int>::iterator &it);  //judge whether the node is handled
map<set<int>, int>dfaTran;                             //transition in the dfa
set<char> inputChar;                                       //input char
vector<int>dfaT;
vector<int>nfaT;

NFA nfa;                                                        //nfa
DFA dfa;                                                        //dfa

int main()
{
	StateTransitionDiagram *n0 = new StateTransitionDiagram();
	StateTransitionDiagram *n1 = new StateTransitionDiagram(1);
	n0->addTransition('a',0);
	n0->addTransition('b',1);
	n1->addTransition('a',1);
	n1->addTransition('a', 0);
	n1->addTransition('b', 0);
	inputChar.insert('a');
	inputChar.insert('b');
	nfa.push_back(n0);
	nfa.push_back(n1);
	nfaT.push_back(1);
	NFAtoDFA();
	cout << "---------------------------NFA----------------------------" << endl;
	for (NFA::iterator it = nfa.begin(); it != nfa.end(); ++it)
		(*it)->print();
	cout << "Terminal statein NFA:";
	for (vector<int>::iterator it=nfaT.begin(); it != nfaT.end(); ++it)
		cout << *it << "   ";
	cout << endl;
	cout << "---------------------------DFA----------------------------" << endl;
	for (DFA::iterator it = dfa.begin(); it != dfa.end(); ++it)
		(*it)->print();
	cout << "Terminal statein NFA:";
	for (vector<int>::iterator it = dfaT.begin(); it != dfaT.end(); ++it)
		cout << *it << "   ";
	cout << endl;
	return 0;
}

multimap<char, int> getTranMatrix(int n)
{
	multimap<char, int>multim;
	NFA::iterator it;
	//find the postion of the node n
	for (it = nfa.begin(); it != nfa.end(); ++it)
	{
		if (n == (*it)->getNode())
		{
			multim = (*it)->getTransitionMatrix();
			break;
		}
	}
	return multim;
}

set<int>epsilonClosure(set<int> &s)
{
	set<int> closureSet = s;   //first, add the initial set to the closure set
	stack<int>nodeStack;      //save all the node
	int currentNode;              //the node being handling
	for (set_it it = closureSet.begin(); it != closureSet.end(); ++it)
		nodeStack.push(*it);
	while (!nodeStack.empty())
	{
		//get the node in the top of the state stack
		currentNode = nodeStack.top();
		nodeStack.pop();
		//get the transition matrix
		multimap<char, int> tranMax = getTranMatrix(currentNode);
		for (multimap_it it = tranMax.begin(); it != tranMax.end(); ++it)
		{
			//if the edge is epsilon and the node is not included in the set,then add the node 
			if (it->first == EPSILON && closureSet.find(it->second)==closureSet.end())
			{
				closureSet.insert(it->second);
				nodeStack.push(it->second);
			}
		}
	}
	return closureSet;
}

set<int>getSet(set<int> &s, char ch)
{
	set<int>ss;
	stack<int>nodeStack;
	multimap<char, int>mmap;
	for (set_it it = s.begin(); it != s.end(); ++it)
		nodeStack.push(*it);
	while (!nodeStack.empty())
	{
		//get the transition matrix
		mmap = getTranMatrix(nodeStack.top());
		nodeStack.pop();
		for (multimap_it it = mmap.begin(); it != mmap.end(); ++it)
		{
			//if match the ch,then add it into the set
			if (it->first == ch)
			{
				ss.insert(it->second);
			}
		}
	}
	return ss;
}

bool getUnhandled(multimap<set<int>, int>::iterator &it)
{
	for ( it= dfaTran.begin(); it != dfaTran.end(); ++it)
	{
		if (it->second < 0)
			return true;
	}
	return false;
}

void NFAtoDFA()
{
	//initiate dfa with the initial state of nfa
	set<int> s;
	s.insert(0);
	dfaTran.insert(make_pair(s,0));
	map<set<int>, int>::iterator map_it = dfaTran.begin();
	//constructor the DFA
	do
	{
		s = map_it->first;
		map_it->second = abs(map_it->second);
		//create a new node, if map_it is negtive,the state is not handled,otherwise,handled
		StateTransitionDiagram *sTranNode = new StateTransitionDiagram(map_it->second);
		dfa.push_back(sTranNode);
		//find the terminal node
		for (vector<int>::iterator it = nfaT.begin(); it != nfaT.end(); ++it)
		{
			if (s.find(*it) != s.end())
			{
				dfaT.push_back(map_it->second);
			}
		}
		
		for (set<char>::iterator it = inputChar.begin(); it != inputChar.end(); ++it)
		{
			set<int>tempSet = epsilonClosure(getSet(s,*it));
			if (!tempSet.empty())//if the set is empty,then no edge
			{
				if (dfaTran.find(tempSet) == dfaTran.end())
				{
					dfaTran.insert(make_pair(tempSet,-(int)dfaTran.size()));
				}
				map_it = dfaTran.find(tempSet);
				sTranNode->addTransition(*it,abs(map_it->second));
			}
		}

	} while (getUnhandled(map_it));
}



代码的实现用到了很多STL里面的类库,STL确实很强大,节省了很多的开发时间,灵活使用STL需要更多的实践经验。编译原理解释了程序的执行原理,如果能理解透彻其中的深层次原理,对编程本身也是一种很大的提升,如果能自己去构造一个编译器,亲身去实现其中的算法,可以想象。不过这确实需要很大的耐心,对编程能力也有很高的要求!少年,任重而道远,加油吧!


你可能感兴趣的:(STL,词法分析,DFA,nfa,子集法)