前天在公司看到LUA的原代码分析,本来学编译原理的时候就很感兴趣,希望自己弄个解释语言玩,当时从词法,语法自动生成,全部自己过,可惜上课到LR后就不认真听了,谁让跟老师混熟了, 哈哈~~ 原来的代码也只写到 LR 语法自动生成...
后来发了下文章,几个小DD,MM要让俺发代码,不过我就把SLR发给了某个DD,其他的全部不理,其实不是不理,是不想让人知道我的代码多难看 - -!!!
自从做了测试,很多时候都只是为了工作方便写点小东西玩,现在看大LUA突然让我又想起了我的编译原理,于是本周末花了一晚上和一白天把原来的代码过了一下,整理出来一份简洁的词法分析,仅仅是把功能差不多实现了,至于错误嘛,还没时间看, DFA的化简,睡了一晚上才想好步骤,因为原来的步骤好时,无用功和搜索浪费时间,最主要是编译原理课本上只说了几句话,靠~ (化简没写完,因为要+班去了,55555),改天更新(得周末)
下面是简单的实现,包括 连接,或,闭包,以及从一个字符生成NFA. 不过还没写通过一个正则表达式->DFA,不过很简单,就是一个算术表达式求值的过程而已了.
贡献给学习编译原理的小DDMM, 至于语法分析的,估计得另一个周末再放送, HOHO
// Reg2DFA.cpp : Defines the entry point for the console application.
//
/************************************************************************/
/* */
/* 作者 : 陈正 */
/* 电邮 : [email protected] */
/* 说明 : 正则表达式->NFA->DFA->化简 */
/* */
/************************************************************************/
#include
#include
#include
#include
// 总的合法字符个数 [3/22/2008 win]
#define MAX_LETTER_COUNT 127
// 简单的整型数组 [3/22/2008 win]
class IntArr
{
public:
IntArr(int count = 0)
{
m_pInts = 0;
m_count = 0;
m_capacity = 0;
if(count != 0)
{
m_capacity = count;
m_count = count;
m_pInts = new int[m_capacity];
memset(m_pInts,0,sizeof(int)*m_capacity);
}
}
~IntArr()
{
delete[] m_pInts;
m_pInts = 0;
m_count = 0;
m_capacity = 0;
}
int& operator[](int idx)
{
return m_pInts[idx];
}
const int& operator[](int idx)const
{
return m_pInts[idx];
}
// 比较 [3/22/2008 win]
friend int compare(const IntArr& a1, const IntArr& a2)
{
if(a1.m_count == 0 && a2.m_count == 0)
return true;
if(a1.m_count < a2.m_count)
return -1;
if(a1.m_count > a2.m_count)
return 1;
return memcmp(a1.m_pInts,a2.m_pInts,sizeof(int)*a1.m_count);
}
// 在idx插入val [3/22/2008 win]
void insert(int idx, int val)
{
if(idx < 0 || idx >m_count)
return;
if(m_count + 1 > m_capacity)
grow();
if(idx
m_pInts[idx] = val;
m_count ++;
}
// 在末尾加上元素 [3/22/2008 win]
void append(int val)
{
if(m_count + 1 > m_capacity)
grow();
m_pInts[m_count] = val;
m_count ++;
}
inline void clean()
{
m_count = 0;
}
IntArr& operator=(const IntArr& a)
{
if(this == &a)
return *this;
m_count = a.m_count;
if(m_capacity < a.m_capacity)
{
m_capacity = a.m_capacity;
int* tmpIns = new int[m_capacity];
memset(tmpIns,0,sizeof(int)*m_capacity);
delete[] m_pInts;
m_pInts = tmpIns;
}
else
{
memset(m_pInts,0,sizeof(int)*m_capacity);
}
memcpy(m_pInts,a.m_pInts,sizeof(int)*m_count);
return *this;
}
inline int count()const{return m_count;}
private:
void grow()
{
static const int growStep = 20;
int preCapacity = m_capacity;
m_capacity += growStep;
int* tmpIns = new int[m_capacity];
memset(tmpIns,0,sizeof(int)*m_capacity);
if(m_pInts)
{
memcpy(tmpIns,m_pInts,sizeof(int)*m_count);
delete[] m_pInts;
}
m_pInts = tmpIns;
}
int* m_pInts;
int m_count;
int m_capacity;
};
// 简单的指针数组 [3/22/2008 win]
class PtArr
{
public:
PtArr(int count = 0):m_arr(count){}
inline void*& operator[](int idx)const
{
help a(&m_arr[idx]);
return *a.ppt;
}
inline void*& operator[](int idx)
{
help a(&m_arr[idx]);
return *a.ppt;
}
inline void insert(int idx, void* pt)
{
m_arr.insert(idx, help(pt).val);
}
inline void append(void* pt)
{
m_arr.append(help(pt).val);
}
inline int count(){return m_arr.count();}
private:
IntArr m_arr;
struct help
{
help(void* p):pt(p){}
help(const void* p):cp(p){}
union
{
const void* cp;
void** ppt;
void* pt;
int val;
};
};
};
class State
{
public:
operator int()const
{
return m_s;
}
State();
State(int s);
int m_s;
inline bool valid()const{return m_s >= 0;}
inline void reset(){m_s = -1;}
};
State::State()
{
reset();
}
State::State(int s)
:m_s(s)
{
}
class StateCollection
{
public:
StateCollection();
~StateCollection();
// 获得指定下标的状态 [3/22/2008 win]
bool getAt(int idx, State& outS)const;
// 返回状态个数 [3/21/2008 win]
inline int count()const{return m_states.count();}
// 加入状态,返回该状态是否存在,false表示存在 [3/21/2008 win]
bool add(const State& s);
// 查找状态,返回状态索引,-1表示不存在 [3/21/2008 win]
int find(const State& s);
// 返回状态是否存在,true表示存在 [3/21/2008 win]
inline bool exist(const State& s){return (find(s)>=0);}
// 返回状态集合是否存在交集 [3/22/2008 win]
bool intersect(const StateCollection& s);
// 设置状态值的偏移值 [3/22/2008 win]
// 使用偏移值是避免遍历给所有状态增加对应的偏移 [3/22/2008 win]
inline void setOffset(int offs){m_offset += offs;}
// 清除所有状态 [3/21/2008 win]
inline void clean(){m_states.clean();};
// 运算操作 [3/22/2008 win]
friend int compare(const StateCollection& sc1, const StateCollection& sc2);
StateCollection& operator=(const StateCollection& sc);
// 输出 [3/22/2008 win]
void print();
private:
IntArr m_states;
// 记录状态值偏移 [3/22/2008 win]
// 记录的状态是0,实际的状态是0+m_offset [3/22/2008 win]
int m_offset;
};
StateCollection::StateCollection()
{
m_offset = 0;
}
StateCollection::~StateCollection()
{
m_offset = 0;
}
int compare(const StateCollection& sc1, const StateCollection& sc2)
{
return compare(sc1.m_states,sc2.m_states);
}
StateCollection& StateCollection::operator=(const StateCollection& sc)
{
if(this == &sc)
return *this;
m_offset = sc.m_offset;
m_states = sc.m_states;
return *this;
}
bool StateCollection::getAt(int idx, State& outS)const
{
if(idx < 0 || idx >= m_states.count())
{
#ifdef KAVA_DEBUG
printf("ERROR : bool StateCollection::getAt : idx < 0 || idx >= m_count");
#endif
return false;
}
outS = m_states[idx] + m_offset;
return true;
}
// 返回状态集合是否存在交集 [3/22/2008 win]
bool StateCollection::intersect(const StateCollection& s)
{
int tCount = count(), sCount = s.count();
if(tCount== 0 || sCount == 0)
return false;
// 如果我最小的大于s中最大的,则没有交集 [3/22/2008 win]
if(m_states[0] > s.m_states[sCount-1])
return false;
if(m_states[tCount-1] < s.m_states[0])
return false;
int itThis=0, itS=0;
while(itThis < tCount && itS < sCount)
{
if(m_states[itThis] == s.m_states[itS])
return true;
if(m_states[itThis] < s.m_states[itS])
itThis ++;
else
itS ++;
}
return false;
}
// 加入状态,保证有序,false表示存在 [3/21/2008 win]
bool StateCollection::add(const State& _s)
{
int s = _s - m_offset;
int tCount = count();
if(tCount == 0)
{
m_states.insert(0,s);
return true;
}
// 插入排序的思想进行插入,之前觉得因为状态都比原来的大,插入会快些,不过不一定,改天想想再2分(不用2分是因为2分需要分啊分啊分 每次都LOGN) [3/21/2008 win]
int end = tCount-1;
while(end >= 0)
{
if(s == m_states[end])
return false;
if(s > m_states[end])
break;
end --;
}
// 第end+1位置放置s [3/21/2008 win]
m_states.insert(end+1,s);
return true;
}
// 因为加入操作保证了有序,使用2分查找 [3/21/2008 win]
int StateCollection::find(const State& _s)
{
int s = _s - m_offset;
int tCount = count();
if(tCount <= 0)
return -1;
if(tCount == 1)
{
if(s == m_states[0])
return 0;
return -1;
}
int beg = 0, end = tCount-1;
do
{
int mid = (end+beg)/2;
if(m_states[mid] == s)
return mid;
else if(m_states[mid] < s)
beg = mid+1;
else end = mid-1;
}while(beg <= end);
// 没找到 [3/21/2008 win]
return -1;
}
// 输出 [3/22/2008 win]
void StateCollection::print()
{
printf("(");
int tCount = count();
for(int i=0; i
printf("%d,",m_states[i]);
}
if(tCount>0)
printf("%d",m_states[tCount-1]);
printf(")");
}
// 字符类型 [3/22/2008 win]
class Letter
{
public:
Letter();
Letter(char c);
operator char()const{return (char)m_c;};
char m_c;
// 空字 [3/21/2008 win]
static const char s_empty;
};
const char Letter::s_empty = 8;
Letter::Letter():m_c(s_empty)
{
}
Letter::Letter(char c)
:m_c(c)
{
}
// 字符集合 [3/22/2008 win]
class LetterCollection
{
public:
LetterCollection();
~LetterCollection();
// 返回字符个数 [3/21/2008 win]
inline int count()const{return m_letters.count();}
// 获得指定下标的字符 [3/22/2008 win]
bool getAt(int idx, Letter& outLt)const;
// 加入字符,返回该字符是否存在,false表示存在 [3/21/2008 win]
bool add(const Letter& lt);
// 查找字符,返回字符索引,-1表示不存在 [3/21/2008 win]
int find(const Letter& lt);
// 返回状态是否存在,true表示存在 [3/21/2008 win]
inline bool exist(const Letter& lt){return (find(lt)>=0);}
// 合并字符表 [3/22/2008 win]
void unionC(const LetterCollection& lc);
// 清除所有字符 [3/22/2008 win]
inline void clean() {m_letters.clean();};
private:
IntArr m_letters;
};
LetterCollection::LetterCollection()
{
}
LetterCollection::~LetterCollection()
{
}
// 合并字符表 [3/22/2008 win]
void LetterCollection::unionC(const LetterCollection& lc)
{
if(this == &lc)
return;
int c = lc.count();
for(int i=0; i
add(lc.m_letters[i]);
}
}
// 获得指定下标的字符 [3/22/2008 win]
bool LetterCollection::getAt(int idx, Letter& outLt)const
{
if(idx < 0 || idx >= count())
{
#ifdef KAVA_DEBUG
printf("ERROR : bool LetterCollection::getAt : idx < 0 || idx >= m_count");
#endif
return false;
}
outLt = m_letters[idx];
return true;
}
// 加入状态,保证有序 [3/21/2008 win]
bool LetterCollection::add(const Letter& lt)
{
int tCount = count();
if(tCount == 0)
{
m_letters.insert(0,lt);
return true;
}
// 插入排序的思想进行插入(不用2分是因为2分需要分啊分啊分 每次都LOGN) [3/21/2008 win]
int end = tCount-1;
while(end >= 0)
{
if(lt == m_letters[end])
return false;
if(lt > m_letters[end])
break;
end --;
}
// 第end+1位置放置s [3/21/2008 win]
m_letters.insert(end+1,lt);
return true;
}
// 因为加入操作保证了有序,使用2分查找 [3/21/2008 win]
int LetterCollection::find(const Letter& lt)
{
int tCount = count();
if(tCount <= 0)
return -1;
if(tCount == 1)
{
if(lt == m_letters[0])
return 0;
return -1;
}
int beg = 0, end = tCount-1;
do
{
int mid = (end+beg)/2;
if(m_letters[mid] == lt)
return mid;
else if(m_letters[mid] < lt)
beg = mid+1;
else end = mid-1;
}while(beg <= end);
// 没找到 [3/21/2008 win]
return -1;
}
// 确定自动机 [3/21/2008 win]
class DFA
{
public:
friend class NFA;
DFA();
~DFA();
// 设置映射, 状态s1遇到lt,转到状态s2 [3/21/2008 win]
void setMap(const State& s1, const Letter& lt,const State& s2);
// 获得映射,不确定自动机从一个状态+输入字符,可以得到N个状态 [3/21/2008 win]
bool getMap(const State& s1, const Letter& lt, State& s2)const;
// 重置自动机 [3/22/2008 win]
inline void reset(){cleanUp();}
// 化简 [3/22/2008 win]
void reduction(DFA& out);
// 输出信息 [3/22/2008 win]
void print();
State m_startState; // 开始状态集合 [3/21/2008 win]
StateCollection m_endStates; // 终状态集合 [3/21/2008 win]
int m_allStates; // 所有状态集合 [3/21/2008 win]
LetterCollection m_letters; // 字符表集合 [3/21/2008 win]
private:
// 基于方便,这里考虑的字符表只有ASC,也就是127个(应该可以再减33个吧 ^_^) [3/21/2008 win]
PtArr m_pMap;
inline State* getMapItem(int idx)const{return (State*)m_pMap[idx];}
// 释放资源 [3/21/2008 win]
void cleanUp();
// 设置为开始状态 [3/21/2008 win]
void setStartState(const State& s);
// 设置为终状态 [3/21/2008 win]
void setEndState(const State& s);
// 创建新状态(状态只有本地概念,既在一个NFA里区分即可) [3/21/2008 win]
void newState(State& outS);
void newStates(int count);
};
DFA::DFA()
{
cleanUp();
}
DFA::~DFA()
{
cleanUp();
}
void DFA::cleanUp()
{
for(int i=0; i
m_allStates = 0;
m_startState.reset();
m_endStates.clean();
m_letters.clean();
}
// 创建新状态(状态只有本地概念,既在一个NFA里区分即可) [3/21/2008 win]
void DFA::newState(State& outS)
{
outS = m_allStates++;
m_pMap.append(new State[MAX_LETTER_COUNT]);
}
void DFA::newStates(int count)
{
m_allStates += count;
while(count-->0)
m_pMap.append(new State[MAX_LETTER_COUNT]);
}
void DFA::setMap(const State& s1, const Letter& lt,const State& s2)
{
int idx1 = s1;
int idx2 = s2;
if(idx1<0 || idx2 <0)
{
#ifdef KAVA_DEBUG
printf("ERROR : void DFA::setMapVal(State& s1, Letter& lt, State& s2) : idx1<0 || idx2 <0");
#endif
return ;
}
if(idx1 >= m_allStates || idx2 >= m_allStates )
{
#ifdef KAVA_DEBUG
printf("ERROR : void DFA::setMapVal(State& s1, Letter& lt, State& s2) : idx1 >= m_allStates.count() || idx2 >= m_allStates.count()");
#endif
return ;
}
getMapItem(idx1)[lt] = (idx2);
}
bool DFA::getMap(const State& s1, const Letter& lt, State& s2)const
{
int idx1 = s1 ;
if(idx1 < 0 || idx1 >= m_allStates)
return false;
s2 = getMapItem(idx1)[lt];
return (s2 >= 0);
}
// 设置为开始状态 [3/21/2008 win]
void DFA::setStartState(const State& s)
{
int idx = s;
if(idx < 0 || idx >= m_allStates )
{
#ifdef KAVA_DEBUG
printf("ERROR : void DFA::setStartState : idx < 0 || idx >= m_allStates ");
#endif
return ;
}
m_startState = s;
}
// 设置为终状态 [3/21/2008 win]
void DFA::setEndState(const State& s)
{
int idx = s;
if(idx < 0 || idx >= m_allStates )
{
#ifdef KAVA_DEBUG
printf("ERROR : void DFA::setEndState : idx < 0 || idx >= m_allStates ");
#endif
return ;
}
m_endStates.add(s);
}
// 化简 [3/22/2008 win]
void DFA::reduction(DFA& out)
{
PtArr statesArr(m_allStates);
int it = 0;
// 根据某个状态,直接得到该状态所在的集合下标(stateArr保存集合) [3/22/2008 win]
IntArr state2idx(m_allStates);
// 先划分终态和非终态 [3/22/2008 win]
StateCollection *end=new StateCollection,*notend=new StateCollection;
// 终态 [3/23/2008 win]
*end = m_endStates;
// 非终态 [3/23/2008 win]
for(int i=0,endit=0; i
if(endit < end->count())
{
State ts;
end->getAt(endit,ts);
if(i == ts)
{
endit ++;
continue;
}
}
(*notend).add(i);
}
statesArr.append(end);
statesArr.append(notend);
}
// 输出信息 [3/22/2008 win]
void DFA::print()
{
int i=0;
printf("DFA 所有状态: ");
for(i=0; i
printf("%d ",i );
}
printf("/n");
printf("初态: ");
printf("%d ",m_startState);
printf("/n");
printf("终态: ");
for(i=0; i
State s;
m_endStates.getAt(i,s);
printf("%d ",s);
}
printf("/n");
printf("字符表: ");
for(i=0; i
Letter lt;
m_letters.getAt(i,lt);
printf("%c ",lt);
}
printf("/n");
printf("映射表: /n");
printf("%7s"," ");
for(i=0; i
Letter lt;
m_letters.getAt(i,lt);
printf("%7c",lt);
}
printf("/n");
for(i=0; i
printf("%7d",i );
int j=0;
Letter lt;
for(j=0; j<=m_letters.count(); j++)
{
if(j>=m_letters.count())
lt = Letter::s_empty;
else
m_letters.getAt(j,lt);
State s = getMapItem(i)[lt];
if(s > 0)
{
printf("%7d",s);
}
else
{
printf("%7s"," ");
}
}
printf("/n");
}
printf("/n");
}
// 不确定自动机 [3/21/2008 win]
class NFA
{
public:
NFA();
~NFA();
// 设置映射, 状态s1遇到lt,转到状态s2 [3/21/2008 win]
void setMap(const State& s1, const Letter& lt,const State& s2);
// 获得映射,不确定自动机从一个状态+输入字符,可以得到N个状态 [3/21/2008 win]
bool getMap(const State& s1, const Letter& lt, StateCollection*& psCollection)const;
// 根据正则表达式创建NFA [3/21/2008 win]
bool initFromLetter(const Letter& lt);
// NFA操作 [3/21/2008 win]
// 连接 [3/21/2008 win]
bool CalcAnd(const NFA& nfa);
// 或 [3/21/2008 win]
bool CalcOr(const NFA& nfa);
// 闭包 [3/21/2008 win]
bool CalcClosure();
// 转换为确定自动机 [3/22/2008 win]
void toDFA(DFA& out);
// 状态集合的ε-闭包 [3/22/2008 win]
// 是状态集I中的任何状态S经任意条ε弧而能到达的状态的集合 [3/22/2008 win]
StateCollection& eClosure(const StateCollection& from, StateCollection& to);
StateCollection& eClosure(StateCollection& from_to);
// 是状态集I中的任何状态S经字符 lt 而能到达的状态的集合 [3/22/2008 win]
// 返回状态集合是否非空,true表示非空 [3/22/2008 win]
bool move(const StateCollection& from, const Letter& lt, StateCollection& to);
// 输出信息 [3/22/2008 win]
void print();
StateCollection m_startStates; // 开始状态集合 [3/21/2008 win]
StateCollection m_endStates; // 终状态集合 [3/21/2008 win]
int m_allStates; // 所有状态集合 [3/21/2008 win]
LetterCollection m_letters; // 字符表集合 [3/21/2008 win]
private:
// 基于方便,这里考虑的字符表只有ASC,也就是127个(应该可以再减33个吧 ^_^) [3/21/2008 win]
PtArr m_pMap;
// 释放资源 [3/21/2008 win]
void cleanUp();
// 设置为开始状态 [3/21/2008 win]
void setStartState(const State& s);
// 设置为终状态 [3/21/2008 win]
void setEndState(const State& s);
// 创建新状态(状态只有本地概念,既在一个NFA里区分即可) [3/21/2008 win]
void newState(State& outS);
void newStates(int count);
};
NFA::NFA()
{
cleanUp();
}
NFA::~NFA()
{
cleanUp();
}
void NFA::cleanUp()
{
for(int i=0; i
StateCollection* pt = (StateCollection*)m_pMap[i];
delete []pt;
}
m_allStates = 0;
m_startStates.clean();
m_endStates.clean();
m_letters.clean();
}
// 创建新状态(状态只有本地概念,既在一个NFA里区分即可) [3/21/2008 win]
void NFA::newState(State& outS)
{
outS = m_allStates++;
m_pMap.append(new StateCollection[MAX_LETTER_COUNT]);
}
void NFA::newStates(int count)
{
m_allStates += count;
while(count -- > 0)
m_pMap.append(new StateCollection[MAX_LETTER_COUNT]);
}
void NFA::setMap(const State& s1, const Letter& lt,const State& s2)
{
int idx1 = s1;
int idx2 = s2;
if(idx1<0 || idx2 <0)
{
#ifdef KAVA_DEBUG
printf("ERROR : void NFA::setMapVal(State& s1, Letter& lt, State& s2) : idx1 < 0 || idx2 < 0");
#endif
return ;
}
if(idx1 >= m_allStates || idx2 >= m_allStates )
{
#ifdef KAVA_DEBUG
printf("ERROR : void NFA::setMapVal(State& s1, Letter& lt, State& s2) : idx1 >= m_allStates.count() || idx2 >= m_allStates.count()");
#endif
return ;
}
StateCollection* states = (StateCollection*)m_pMap[idx1];
if(states)
states[lt].add(idx2);
else
printf("ERROR: NFA::setMap(const State& s1, const Letter& lt,const State& s2) : states == NULL");
}
bool NFA::getMap(const State& s1, const Letter& lt, StateCollection*& psCollection)const
{
int idx1 = s1 ;
if(idx1 < 0 || idx1 >= m_allStates)
return false;
StateCollection* states = (StateCollection*)m_pMap[idx1];
if(states)
psCollection = &states[lt];
else
return false;
return (psCollection->count() > 0);
}
// 设置为开始状态 [3/21/2008 win]
void NFA::setStartState(const State& s)
{
int idx = s;
if(idx<0 || idx >= m_allStates )
{
#ifdef KAVA_DEBUG
printf("ERROR : void NFA::setStartState : idx<0 || idx >= m_allStates ");
#endif
return ;
}
m_startStates.add(s);
}
// 设置为终状态 [3/21/2008 win]
void NFA::setEndState(const State& s)
{
int idx = s;
if(idx < 0 || idx >= m_allStates )
{
#ifdef KAVA_DEBUG
printf("ERROR : void NFA::setEndState : idx<0 || idx >= m_allStates ");
#endif
return ;
}
m_endStates.add(s);
}
// 根据正则表达式创建NFA [3/21/2008 win]
bool NFA::initFromLetter(const Letter& lt)
{
cleanUp();
// 如果是空字 [3/21/2008 win]
if(lt == Letter::s_empty)
{
State s;
this->newState(s);
this->setStartState(s);
this->setEndState(s);
}
else
{
State start, end;
this->newState(start);
this->newState(end);
this->setStartState(start);
this->setEndState(end);
this->m_letters.add(lt);
this->setMap(start,lt,end);
}
return true;
}
// 或运算 [3/21/2008 win]
bool NFA::CalcOr(const NFA& nfa)
{
// nfa1 | nfa2 [3/22/2008 win]
// 现在变成 [3/22/2008 win]
// newStart 经过空字->(原初态) nfa1 (原终态)经过空字 -> newEnd [3/22/2008 win]
// | |
// ——>(原初态) nfa2 (原终态)经过空字 ——
// 取得原来的开始状态和终状态,此计算过程中,开始和终状态只有一个 [3/22/2008 win]
State oldStart1,oldEnd1;
bool valid = this->m_startStates.getAt(0,oldStart1);
valid |= this->m_endStates.getAt(0,oldEnd1);
State oldStart2, oldEnd2;
valid |= nfa.m_startStates.getAt(0,oldStart2);
valid |= nfa.m_endStates.getAt(0,oldEnd2);
if(!valid)
{
#ifdef KAVA_DEBUG
printf("ERROR: bool NFA::CalcAnd : !valid");
#endif
return false;
}
// 原来的开始状态和终状态不再是开始状态和终状态 [3/22/2008 win]
this->m_startStates.clean();
this->m_endStates.clean();
// 创建新的开始状态和终态 [3/22/2008 win]
State newStart, newEnd;
this->newState(newStart);
this->newState(newEnd);
this->setStartState(newStart);
this->setEndState(newEnd);
// 合并字符表 [3/22/2008 win]
this->m_letters.unionC(nfa.m_letters);
// 记录状态偏移 [3/22/2008 win]
// 因为nfa的映射要合入到this中,而nfa的所有状态需要在this中重新分配 [3/22/2008 win]
int stateOffset = this->m_allStates;
// 重新分配nfa中所有的状态 [3/22/2008 win]
this->newStates(nfa.m_allStates);
// 由于重新分配了状态,因此需要更新映射 [3/22/2008 win]
// 比如原来是 经过lt到达状态2,现在就是到达状态2+stateOffset [3/22/2008 win]
for(int i=stateOffset; i
for(int j=0; j
StateCollection* nfaStates = (StateCollection*)nfa.m_pMap[i-stateOffset];
StateCollection* tStates = (StateCollection*)m_pMap[i];
if(!nfaStates || !tStates)
{
#ifdef KAVA_DEBUG
printf("NFA::CalcOr : states == NULL");
#endif
continue;
}
if(nfaStates[j].count() > 0)
{
tStates[j] = nfaStates[j];
tStates[j].setOffset(stateOffset);
}
}
}
// 设置新的开始状态和终态相关的映射 [3/22/2008 win]
// nfa的状态要加上偏移 [3/22/2008 win]
this->setMap(newStart , Letter::s_empty, oldStart1);
this->setMap(newStart , Letter::s_empty, oldStart2+stateOffset);
this->setMap(oldEnd1 , Letter::s_empty, newEnd);
this->setMap(oldEnd2+stateOffset , Letter::s_empty, newEnd);
return true;
}
// 连接运算 [3/21/2008 win]
bool NFA::CalcAnd(const NFA& nfa)
{
// nfa1 nfa2 [3/22/2008 win]
// 现在变成 [3/22/2008 win]
// nfa1 (原终态)经过空字 -> (原初态)nfa2
// 原来的初态不变,终态改为nfa的终态 [3/22/2008 win]
// 取得this的原终态和nfa的原初态 [3/22/2008 win]
State oldEnd1;
bool valid = this->m_endStates.getAt(0,oldEnd1);
State oldStart2,oldEnd2;
valid |= nfa.m_startStates.getAt(0,oldStart2);
valid |= nfa.m_endStates.getAt(0,oldEnd2);
if(!valid)
{
#ifdef KAVA_DEBUG
printf("ERROR: bool NFA::CalcAnd : !valid");
#endif
return false;
}
// 删除原来的终态 [3/22/2008 win]
m_endStates.clean();
// 合并字符表 [3/22/2008 win]
this->m_letters.unionC(nfa.m_letters);
// 记录状态偏移 [3/22/2008 win]
// 因为nfa的映射要合入到this中,而nfa的所有状态需要在this中重新分配 [3/22/2008 win]
int stateOffset = this->m_allStates;
// 重新分配nfa中所有的状态 [3/22/2008 win]
this->newStates(nfa.m_allStates);
// 由于重新分配了状态,因此需要更新映射 [3/22/2008 win]
// 比如原来是 经过lt到达状态2,现在就是到达状态2+stateOffset [3/22/2008 win]
for(int i=stateOffset; i
for(int j=0; j
StateCollection* nfaStates = (StateCollection*)nfa.m_pMap[i-stateOffset];
StateCollection* tStates = (StateCollection*)m_pMap[i];
if(!nfaStates || !tStates)
{
#ifdef KAVA_DEBUG
printf("NFA::CalcAnd : states == NULL");
#endif
continue;
}
if(nfaStates[j].count() > 0)
{
tStates[j] = nfaStates[j];
tStates[j].setOffset(stateOffset);
}
}
}
// 设置新的开始状态和终态相关的映射 [3/22/2008 win]
// nfa的状态要加上偏移 [3/22/2008 win]
this->setMap(oldEnd1 , Letter::s_empty, oldStart2 + stateOffset);
// 设置终态为nfa的终态度(加偏移) [3/22/2008 win]
this->setEndState(oldEnd2+stateOffset);
return true;
}
// 闭包运算 [3/21/2008 win]
bool NFA::CalcClosure()
{
// nfa* [3/22/2008 win]
// 现在变成 [3/22/2008 win]
// 取得this的原初态和原终态 [3/22/2008 win]
State oldStart,oldEnd;
bool valid = this->m_startStates.getAt(0,oldStart);
valid |= this->m_endStates.getAt(0,oldEnd);
if(!valid)
{
#ifdef KAVA_DEBUG
printf("ERROR: bool NFA::CalcAnd : !valid");
#endif
return false;
}
// 原来的开始状态和终状态不再是开始状态和终状态 [3/22/2008 win]
this->m_startStates.clean();
this->m_endStates.clean();
// 创建新的开始状态和终态 [3/22/2008 win]
State newStart, newEnd;
this->newState(newStart);
this->newState(newEnd);
this->setStartState(newStart);
this->setEndState(newEnd);
// 设置新的开始状态和终态相关的映射 [3/22/2008 win]
this->setMap(newStart , Letter::s_empty, oldStart);
this->setMap(newStart , Letter::s_empty, newEnd);
this->setMap(oldEnd , Letter::s_empty, oldStart);
this->setMap(oldEnd , Letter::s_empty, newEnd);
return true;
}
// 状态集合的ε-闭包 [3/22/2008 win]
// 是状态集I中的任何状态S经任意条ε弧而能到达的状态的集合 [3/22/2008 win]
StateCollection& NFA::eClosure(const StateCollection& from, StateCollection& to)
{
struct Local
{
Local(bool*pf, const NFA& _nfa, StateCollection& _to):pFlag(pf),nfa(_nfa),to(_to)
{}
inline void run(const State& sIt)
{
StateCollection* pOut=0;
if(nfa.getMap(sIt,Letter::s_empty,pOut))
{
for(int i=0; i
{
State s;
pOut->getAt(i,s);
if(!pFlag[s])
{
to.add(s);
pFlag[s] = 1;
run(s);
}
}
}
}
bool *pFlag;
const NFA& nfa;
StateCollection& to;
};
int total= m_allStates ;
bool *pFlag = new bool[total];
memset(pFlag,0,sizeof(bool)*total);
State sIt;
int count = from.count();
for(int i=0; i
from.getAt(i,sIt);
// from 中的所有状态都加入到to中 [3/22/2008 win]
to.add(sIt);
// 递归进行加入操作 [3/22/2008 win]
Local help(pFlag,*this,to);
help.run(sIt);
}
delete[] pFlag;
return to;
}
StateCollection& NFA::eClosure(StateCollection& from_to)
{
struct Local
{
Local(bool*pf, const NFA& _nfa, StateCollection& _to):pFlag(pf),nfa(_nfa),to(_to)
{}
inline void run(const State& sIt)
{
StateCollection* pOut=0;
if(nfa.getMap(sIt,Letter::s_empty,pOut))
{
for(int i=0; i
{
State s;
pOut->getAt(i,s);
if(!pFlag[s])
{
to.add(s);
pFlag[s] = 1;
run(s);
}
}
}
}
bool *pFlag;
const NFA& nfa;
StateCollection& to;
};
int total= m_allStates ;
bool *pFlag = new bool[total];
memset(pFlag,0,sizeof(bool)*total);
State sIt;
int count = from_to.count();
for(int i=0; i
from_to.getAt(i,sIt);
// from 中的所有状态都加入到to中 [3/22/2008 win]
pFlag[sIt] = 1;
// 递归进行加入操作 [3/22/2008 win]
Local help(pFlag,*this,from_to);
help.run(sIt);
}
delete[] pFlag;
return from_to;
}
// 是状态集I中的任何状态S经字符 lt 而能到达的状态的集合 [3/22/2008 win]
bool NFA::move(const StateCollection& from, const Letter& lt, StateCollection& to)
{
// 不是空字 [3/22/2008 win]
if(lt == Letter::s_empty)
return false;
int total= m_allStates ;
bool *pFlag = new bool[total];
memset(pFlag,0,sizeof(bool)*total);
bool empty = true;
State sIt;
int count = from.count();
for(int i=0; i
from.getAt(i,sIt);
StateCollection* pOut=0;
if(this->getMap(sIt,lt,pOut))
{
int count2 = pOut->count();
for(int j=0; j
State s2;
pOut->getAt(j,s2);
if( !pFlag[s2] )
{
pFlag[s2] = 1;
to.add(s2);
empty = false;
}
}
}
}
delete[] pFlag;
return !empty;
}
// 转换为确定自动机 [3/22/2008 win]
void NFA::toDFA(DFA& out)
{
struct Local
{
~Local()
{
for(int i=0; i
StateCollection* pt = (StateCollection*)m_arStatesPt[i];
delete pt;
}
}
bool getAt(int i, StateCollection*& pOut)
{
if(i<0 || i>=m_arStatesPt.count())
return false;
pOut = (StateCollection*)m_arStatesPt[i];
return true;
}
// 返回true表示没有存在,false表示存在 [3/22/2008 win]
// idx返回对应的索引 [3/22/2008 win]
bool add(StateCollection* pSC,int &idx)
{
int count = m_arStatesPt.count();
if(count == 0)
{
idx = 0;
m_arStatesPt.append(pSC);
m_arIdx.append(idx);
return true;
}
// 使用2分法确定该插入的位置 [3/22/2008 win]
int beg = 0, end = count-1;
do
{
int mid = (end+beg)/2;
int realMidIdx= m_arIdx[mid];
int cmp = compare(*(StateCollection*)m_arStatesPt[realMidIdx],*pSC);
if(0 == cmp)
{
// 找到重复元素 [3/22/2008 win]
idx = realMidIdx;
return false;
}
else if(cmp < 0)
beg = mid+1;
else
end = mid-1;
}while(beg <= end);
// 第beg位置放置新的索引 [3/21/2008 win]
m_arIdx.insert(beg,count);
m_arStatesPt.append(pSC);
idx = count;
return true;
}
inline int count(){return m_arStatesPt.count();}
private:
PtArr m_arStatesPt;
IntArr m_arIdx;
};
// 开始状态 [3/22/2008 win]
StateCollection *pStart = new StateCollection;
this->eClosure(this->m_startStates,*pStart);
State start;
out.reset();
out.m_letters.unionC(m_letters);
out.newState(start);
out.setStartState(start);
// 初态有可能也是终态 [3/22/2008 win]
if(m_endStates.intersect(*pStart))
out.setEndState(start);
Local help;
int idx;
help.add(pStart,idx);
int it = 0;
Letter letter;
int ltCount = m_letters.count();
while(it < help.count())
{
StateCollection* pSC = 0;
help.getAt(it,pSC);
for (int ltIt = 0;ltIt < ltCount; ltIt++)
{
m_letters.getAt(ltIt,letter);
// 获得move集合 [3/22/2008 win]
StateCollection* pStates=new StateCollection;
if(this->move(*pSC,letter,*pStates))
{
this->eClosure(*pStates);
// 判断新的集合是否已经存在 [3/22/2008 win]
int newIdx = 0;
if(help.add(pStates,newIdx))
{
// 如果不存在,则DFA生成新的状态 [3/22/2008 win]
State newState;
out.newState(newState);
// 判断该集合中是否包含原来的终态,如果包含,则作为DFA的终态 [3/22/2008 win]
if(m_endStates.intersect(*pStates))
out.setEndState(newState);
}
else
{
delete pStates;
}
// it == pSC对应的状态, newIdx也是对应的状态 [3/22/2008 win]
out.setMap(it/*下标和状态名字一样*/, letter, newIdx);
}
else
{
delete pStates;
}
}
it ++;
}
}
void NFA::print()
{
int i=0;
printf("NFA 所有状态: ");
for(i=0; i
printf("%d ",i );
}
printf("/n");
printf("初态: ");
for(i=0; i
State s;
m_startStates.getAt(i,s);
printf("%d ",s);
}
printf("/n");
printf("终态: ");
for(i=0; i
State s;
m_endStates.getAt(i,s);
printf("%d ",s);
}
printf("/n");
printf("字符表: ");
for(i=0; i
Letter lt;
m_letters.getAt(i,lt);
printf("%c ",lt);
}
printf("/n");
printf("映射表: /n");
printf("%7s"," ");
for(i=0; i<=m_letters.count(); i++)
{
if(i >= m_letters.count())
printf("%7s","ε");
else
{
Letter lt;
m_letters.getAt(i,lt);
printf("%7c",lt);
}
}
printf("/n");
for(i=0; i
printf("%7d",i );
int j=0;
Letter lt;
for(j=0; j<=m_letters.count(); j++)
{
if(j>=m_letters.count())
lt = Letter::s_empty;
else
m_letters.getAt(j,lt);
StateCollection* tStates = (StateCollection*)m_pMap[i];
if(!tStates)
{
#ifdef KAVA_DEBUG
printf("NFA::CalcAnd : states == NULL");
#endif
continue;
}
int c = tStates[lt].count();
if(c > 0)
{
static char buf[1024];
int ib = 0;
buf[ib++] = '(';
for(int x = 0; x
State s;
tStates[lt].getAt(x,s);
itoa(s,buf+ib,10);
ib+=(int)log10(s)+1;
buf[ib++]=',';
}
buf[ib-1]=')';
buf[ib] = 0;
printf("%7s",buf);
}
else
{
printf("%7s"," ");
}
}
printf("/n");
}
printf("/n");
}
int main(int argc, char* argv[])
{
NFA nfa,nfa2,nfa3,nfa1;
nfa1.initFromLetter('a');
nfa.initFromLetter('a');
nfa2.initFromLetter('b');
nfa3.initFromLetter('c');
nfa3.CalcClosure();
nfa1.CalcClosure();
nfa.CalcAnd(nfa1);
nfa.CalcAnd(nfa2);
nfa.CalcAnd(nfa3);
nfa.print();
DFA dfa;
nfa.toDFA(dfa);
dfa.print();
return 0;
}