编译原理:正规式转变成DFA算法

//将正规式转变成NFA
package hjzgg.formal_ceremony_to_dfa; import java.util.ArrayList;
class Edge{ public int u, v; public char key; public Edge(int u, int v, char key) { super(); this.u = u; this.v = v; this.key = key; } @Override public String toString() { return u + "->" + v + " " + key; } @Override public boolean equals(Object arg0) { Edge tmp = (Edge)arg0; return tmp.u==this.u && tmp.v==this.v && tmp.key==this.key; } @Override public int hashCode() { return u+v+key; } } class NFA{ public static final int MAX_NODE = 100; private boolean finalState[] = new boolean[MAX_NODE];//记录每一个节点是否为终态 private String formal_ceremony;//正规式字符串 private int cnt_node=1;//记录节点的个数 private Map<Integer, Integer> endNode = new TreeMap<Integer, Integer>();//每一个开始节点对应的终端节点 private ArrayList<Edge> nodeAl = new ArrayList<Edge>(); private Vector<Pair>[] g = new Vector[MAX_NODE];//NFA图 private Set<Character> st = new TreeSet<Character>();//正规式中出现的字符的集合 public NFA(String formal_ceremony) { super(); this.formal_ceremony = formal_ceremony; } private void addEdge(int u, int v, char ch){ nodeAl.add(new Edge(u, v, ch)); if(g[u] == null) g[u] = new Vector<Pair>(); g[u].add(new Pair(v, ch)); if(ch!='$') st.add(ch); } public boolean kernel_way(int fa, int ld, int rd, boolean isClosure){//fa表示区间的开始点,正规式的区间[ld, rd], isClosure表示这段区间查是否存在闭包 if(ld < 0 || rd >= formal_ceremony.length()){ System.out.println("正规式不正确---发生数组越界!"); return false; } int pre_node = fa; int inBracket = 0;//判断'|'是否在括弧内 for(int i=ld; i<=rd; ++i){ if(formal_ceremony.charAt(i)=='(') ++inBracket; else if(formal_ceremony.charAt(i)==')') --inBracket; else if(formal_ceremony.charAt(i)=='|' && 0==inBracket){ if(!kernel_way(fa, ld, i-1, isClosure)) return false; if(!kernel_way(fa, i+1, rd, isClosure)) return false; return true; } } for(int i=ld; i<=rd; ++i){ if(formal_ceremony.charAt(i)=='('){//又是一个子区间 //寻找和 该 '('相匹配的')' int cntLeftBracket = 0;//统计遍历过程中'('出现的次数,遇到')'减去1 int posRightBracket = -1;//记录相匹配的')'的位置 int posLeftBracket = i; for(int j=i+1; j<=rd; ++j){ if(formal_ceremony.charAt(j)=='(') ++cntLeftBracket; else if(formal_ceremony.charAt(j)==')'){ if(cntLeftBracket == 0){ posRightBracket = j; break; } --cntLeftBracket; } } if(posRightBracket == -1){//出错 System.out.println("正规式出错----括弧不匹配!"); return false; } int nodeFather = 0;//括弧内正则式的开始节点 if(posRightBracket+1 <= rd && formal_ceremony.charAt(posRightBracket+1)=='*'){ i = posRightBracket+1;//过滤掉"()*" addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 pre_node = cnt_node; nodeFather = cnt_node; addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 pre_node = cnt_node; //处理()*括弧内的正规式 if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, true)) return false; } else { nodeFather = pre_node; if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, false))//对于"(101)", 看成101 return false; i = posRightBracket; } } else {//单个字符 if(formal_ceremony.charAt(i)==')') continue; if(i+1 <= rd && formal_ceremony.charAt(i+1)=='*'){ addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 pre_node = cnt_node; addEdge(pre_node, pre_node, formal_ceremony.charAt(i)); if(i+1==rd && isClosure) addEdge(pre_node, fa, '$');//表示这一条边为空并且是连接到父亲节点 else{ if(endNode.containsKey(fa)) addEdge(pre_node, endNode.get(fa), '$'); else{ addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点 } } pre_node = cnt_node; ++i;//过滤* } else { if(i==rd && isClosure){//是闭包的情况 addEdge(pre_node, fa, formal_ceremony.charAt(i)); } else{ if(endNode.containsKey(fa)) addEdge(pre_node, endNode.get(fa), formal_ceremony.charAt(i)); else{ addEdge(pre_node, ++cnt_node, formal_ceremony.charAt(i)); if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点 } } pre_node = cnt_node; } } } return true; } private void checkFinalState(){//检查哪一个节点是终态 for(int i=1; i<=cnt_node; ++i){ int cc = 0; if(g[i] == null){//表明是终态 finalState[i] = true; continue; } for(int j=0; j<g[i].size(); ++j) if(g[i].elementAt(j).v != i) ++cc; if(cc == 0)//表明是终态 finalState[i] = true; } } public boolean[] getFinalState(){ return finalState; } public Vector<Pair>[] getNFAGraphics(){ if(kernel_way(1, 0, formal_ceremony.length()-1, false)){ // for(Edge e : nodeAl)//打印NFA // System.out.println(e); checkFinalState(); return g; } return null; } public Set<Character> getCharacterSet(){ return st; } public void outputNFA(){ if(kernel_way(1, 0, formal_ceremony.length()-1, false)){ checkFinalState(); for(Edge e : nodeAl) System.out.println(e); } } } /* * 将正规式转换成NFA * */ public class ToNFA { public static void main(String[] args){ String formal_ceremony = "0*(100*)*0*"; // String formal_ceremony = "1(1010*|1(010)*1)*0"; // String formal_ceremony = "1(0|1)*101"; // String formal_ceremony = "0*1*(010)0*1*"; // String formal_ceremony = "(0|1|2)*"; // String formal_ceremony = "0|1"; // String formal_ceremony = "0|1|2|3"; // String formal_ceremony = "(0|1|6)|(2|3)|(4|5)"; // String formal_ceremony = "(0|1)*|(2|3)*"; // String formal_ceremony = "((10)|(01)*|(0|1))"; NFA nfa = new NFA(formal_ceremony); nfa.outputNFA(); } }
//将NFA转变成确定化DFA
package hjzgg.formal_ceremony_to_dfa; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.Queue; import java.util.Set; import java.util.Vector;
class Pair { public int v; public char ch; public Pair(int v, char ch) { super(); this.v = v; this.ch = ch; } } class MyHashSet extends HashSet<Integer>{//重写 set 集合的 hashcode()和equals()方法 private int state; public void setState(int state){ this.state = state; } public int getState(){ return state; } @Override public boolean equals(Object arg0) { MyHashSet tmp = (MyHashSet)arg0; if(tmp.size() != this.size()) return false; Iterator<Integer> it = this.iterator(); while(it.hasNext()){ if(!tmp.contains(it.next())) return false; } return true; } @Override public int hashCode() { int sum = 0; Iterator<Integer> it = this.iterator(); while(it.hasNext()) sum += (((java.lang.Integer)it.next()).intValue()); return sum; } } class DefinedNFA{ private int dfaNode = 0;//defined DFA节点的个数 private boolean[] finalState = null;//表示NFA中哪一个节点是终态 private boolean[] newFinalState = new boolean[NFA.MAX_NODE] ; private Vector<Pair>[] g = null;//NFA 图 private Set<Edge>edgeSet = new HashSet<Edge>(); //标记图中的边是否被访问过 private MyHashSet st = null; //集合,表示每一个子集状态 private Queue<MyHashSet> queue = new LinkedList<MyHashSet>();//存放要执行的子集状态 private Set<MyHashSet> sst = new HashSet<MyHashSet>(); private Set<Character> characterSet = null;//正规式中的字符的集合 private ArrayList<Edge> nodeAl = new ArrayList<Edge>();//NFA边的集合 public DefinedNFA(Vector<Pair>[] g, Set<Character> characterSet, boolean[] finalState) { super(); this.g = g; this.characterSet = characterSet; this.finalState = finalState; } public Set<Character> getCharacterSet(){ return characterSet; } public int getDfaNode(){ return dfaNode; } public boolean[] getNewFinalState(){ return newFinalState; } public ArrayList<Edge> getNodeAl(){ return nodeAl; } private void dfs(int u, char ch){ if(g[u]==null) return ; int len = g[u].size(); for(int i=0; i<len; ++i){ Pair pair = g[u].elementAt(i); Edge edge = new Edge(u, pair.v, pair.ch); if(!edgeSet.contains(edge) && pair.ch==ch){ edgeSet.add(edge); st.add(pair.v); dfs(pair.v, '$'); } } } public void checkIsFinalState(Set<Integer> st, int state){ Iterator<Integer> it = st.iterator(); while(it.hasNext()){ int val = it.next(); if(finalState[val]) newFinalState[state] = true; } } private void initFirstSet(){ edgeSet.clear(); st = new MyHashSet(); st.add(1); st.setState(++dfaNode); dfs(1, '$'); checkIsFinalState(st, dfaNode); sst.add(st); queue.add(st); } private void addEdge(int u, int v, char ch){ nodeAl.add(new Edge(u, v, ch)); } public void ToStateMatrix(){ initFirstSet(); while(!queue.isEmpty()){ MyHashSet myset = queue.poll(); for(Character ch : characterSet){ st = new MyHashSet(); for(Integer i : myset){ edgeSet.clear(); dfs(i, ch); } if(st.size()>0){ if(!sst.contains(st)){ sst.add(st); queue.add(st); st.setState(++dfaNode); checkIsFinalState(st, dfaNode); } else { Iterator<MyHashSet> it = sst.iterator(); while(it.hasNext()){ MyHashSet tmp = it.next(); if(tmp.equals(st)){ st = tmp; break; } } } addEdge(myset.getState(), st.getState(), ch); } } } } public void outputDFA(){ ToStateMatrix();//有状态转换矩阵得到defined NFA for(Edge e : nodeAl) System.out.println(e); } } public class ToDefinedDFA { public static void main(String[] args) { // String formal_ceremony = "((10)|(01)*|(0|1))"; // String formal_ceremony = "(0|1|6)|(2|3)|(4|5)"; // String formal_ceremony = "1(0|1)*101"; String formal_ceremony = "0*(100*)*0*"; NFA nfa = new NFA(formal_ceremony); DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState()); definedDFA.outputDFA(); } }
//将确定化DFA最小化
package hjzgg.formal_ceremony_to_dfa; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set;
class MinimumDFA{ private boolean[] newFinalState = null;//由确定化DFA得到 private ArrayList<Edge> nodeAl = null;//由确定化DFA得到 private int dfaNode;//确定化DFA节点的个数 private Set<Character> characterSet = null;//正规式中的字符的集合 private ArrayList<Set<Integer>> setList = new ArrayList<Set<Integer>>(); public MinimumDFA(boolean[] newFinalState, ArrayList<Edge> nodeAl, int dfaNode, Set<Character> characterSet) { super(); this.newFinalState = newFinalState; this.nodeAl = nodeAl; this.dfaNode = dfaNode; this.characterSet = characterSet; } private void init(){//利用分割法将集合分成终态和非终态 Set<Integer> finalStateSet = new HashSet<Integer>(); Set<Integer> NofinalStateSet = new HashSet<Integer>(); for(int i=1; i<=dfaNode; ++i) if(newFinalState[i])//终态 finalStateSet.add(i); else NofinalStateSet.add(i); setList.add(finalStateSet); setList.add(NofinalStateSet); } public void toMinimumDfa(){ init(); boolean flag = true; ArrayList<Set<Integer>> tmpSetList = new ArrayList<Set<Integer>>(); while(flag){ flag = false; hjzgg: for(int k=0; k<setList.size(); ++k){ Set<Integer> st = setList.get(k); if(st.size()<=1) continue; for(Character ch : characterSet){ Map<Integer, Integer> mp = new HashMap<Integer, Integer>(); for(int i=0; i<nodeAl.size(); ++i){//st集合(也就是map的val值)在 ch这个点对应的集合 {st}a = {...} Edge edge = nodeAl.get(i); if(edge.key == ch && st.contains(edge.u)) mp.put(edge.u, edge.v); }

            for(Integer i : st)
              if(!mp.containsKey(i))//表明i节点对应的是一条空边
                mp.put(i, -1);

//将st集合拆分成两个不想交的集合
                    Set<Integer> firstSet = new HashSet<Integer>();
                    Set<Integer> secondSet = new HashSet<Integer>();
                    for(int j=0; j<setList.size(); ++j){
                        firstSet.clear();
                        secondSet.clear();
                        Set<Integer> tmpSt = setList.get(k);
                        for(Entry<Integer, Integer> entry : mp.entrySet()){//返回此映射中包含的映射关系的 set 视图。返回的 set 中的每个元素都是一个 Map.Entry
                              if(tmpSt.contains(entry.getValue()))
                                  firstSet.add(entry.getKey());
                              else secondSet.add(entry.getKey());
                        }
                        if(firstSet.size()!=0 && secondSet.size()!=0){
                            flag = true;//如果发现可以拆分的集合,则继续最顶层的while循环
                            for(Integer i : tmpSt){//将firstSet 和 secondSet中都没有的元素添加到firstSet中
                                if(!firstSet.contains(i) && !secondSet.contains(i))
                                    firstSet.add(i);
                            }
                            setList.remove(k);
                            setList.add(firstSet);
                            setList.add(secondSet);
                            break hjzgg;
                        }
                    }
                }
            }
        }
//        for(int k=0; k<setList.size(); ++k)//输出最终的集合划分
//            System.out.println(setList.get(k));
//        System.out.println("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&");
        for(int k=0; k<setList.size(); ++k){
            Set<Integer> st = setList.get(k);
            if(st.size() > 1){//看成是一个等价的状态,选择第一个元素当作代表
                int first=0;
                for(Integer i : st){//取得第一个元素
                    first = i;
                    break;
                }
                ArrayList<Edge> tmpList = new ArrayList<Edge>();
                for(int i=0; i<nodeAl.size(); ++i){//遍历所有的边,找到不是first
                    Edge edge = nodeAl.get(i);
                    if(st.contains(edge.u) && edge.u!=first){
                        nodeAl.remove(i);
                        --i;
                    } else if(st.contains(edge.v) && edge.v!=first){
                        nodeAl.remove(i);
                        --i;
                        tmpList.add(new Edge(edge.u, first, edge.key));
                    }
                }
                nodeAl.addAll(tmpList);
            }
        }
    }
    
    public void outputMinimumDFA(){
//        for(int i=0; i<nodeAl.size(); ++i)//输出未确定化的DFA
//            System.out.println(nodeAl.get(i));
        toMinimumDfa();
        for(int i=0; i<nodeAl.size(); ++i)
            System.out.println(nodeAl.get(i));
    }
}

public class ToMinimumDFA {

    public static void main(String[] args) {
//        String formal_ceremony = "1(0|1)*101";
        String formal_ceremony = "0*(100*)*0*";
        NFA nfa = new NFA(formal_ceremony);
        DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState());
        definedDFA.ToStateMatrix();
        MinimumDFA minimumDFA = new MinimumDFA(definedDFA.getNewFinalState(), definedDFA.getNodeAl(), definedDFA.getDfaNode(), definedDFA.getCharacterSet());
        minimumDFA.outputMinimumDFA();
    }

}

 

你可能感兴趣的:(编译原理)