package com.siwanghu.syntaxanalyzer.bean; import java.util.ArrayList; import java.util.List; public class Grammar { private String left; private String right; private List<String> lefts; private List<String> rights; private int id; private static int ID = 0; public Grammar() { super(); id = ID++; lefts = new ArrayList<String>(); rights = new ArrayList<String>(); } public Grammar(String left, String right) { super(); this.left = left; this.right = right; lefts = new ArrayList<String>(); rights = new ArrayList<String>(); id = ID++; } public String getLeft() { return left; } public void setLeft(String left) { this.left = left.replace(" ", ""); } public String getRight() { return right; } public void setRight(String right) { this.right = right.replace(" ", ""); } public int getId() { return id; } public List<String> getLefts() { return lefts; } public void setLefts(List<String> lefts) { this.lefts = lefts; } public List<String> getRights() { return rights; } public void setRights(List<String> rights) { this.rights = rights; } public void createTaken() { this.getLefts().add(this.getLeft()); String right = this.getRight(); if (right.equals("epsilon")) { this.getRights().add(right); } else { while (right.contains("epsilon")) { right = right.replaceFirst("epsilon", "#"); } if (right.length() == 1) { this.getRights().add(right); } else { for (int i = 0; i < right.length(); i++) { if ((i + 1 < right.length()) && String.valueOf(right.charAt(i + 1)).equals("'")) { this.getRights().add(right.charAt(i) + "'"); } else { if (!(String.valueOf(right.charAt(i)).equals("'"))) { if (String.valueOf(right.charAt(i)).equals("#")) { this.getRights().add("epsilon"); } else { this.getRights().add( String.valueOf(right.charAt(i))); } } } } } } } @Override public String toString() { return "Grammar [left=" + left + ", right=" + right + "]"; } }
package com.siwanghu.syntaxanalyzer.bean; public class TableItem { private String nonTerminatingSymbol; private String terminatingSymbol; private Grammar grammar; public String getNonTerminatingSymbol() { return nonTerminatingSymbol; } public TableItem setNonTerminatingSymbol(String nonTerminatingSymbol) { this.nonTerminatingSymbol = nonTerminatingSymbol; return this; } public String getTerminatingSymbol() { return terminatingSymbol; } public TableItem setTerminatingSymbol(String terminatingSymbol) { this.terminatingSymbol = terminatingSymbol; return this; } public Grammar getGrammar() { return grammar; } public TableItem setGrammar(Grammar grammar) { this.grammar = grammar; return this; } public TableItem createTaken(){ grammar.createTaken(); return this; } @Override public String toString() { String temp="\n"; temp+="左部:"+grammar.getLefts().get(0)+"\n"; temp+="右部:"; for(int i=0;i<grammar.getRights().size();i++){ temp+=grammar.getRights().get(i)+" "; } temp+="共计 "+grammar.getRights().size(); return "TableItem [nonTerminatingSymbol=" + nonTerminatingSymbol + ", terminatingSymbol=" + terminatingSymbol + "]"+temp; } } package com.siwanghu.syntaxanalyzer.algorithm; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import com.siwanghu.syntaxanalyzer.bean.Grammar; public class Production { private String begin=""; //开始符号 private List<Grammar> productions = new LinkedList<Grammar>(); // 产生式 private List<Character> symbols = new ArrayList<Character>(); // 初始产生式非终结符 private List<String> nonTerminatingSymbol = new ArrayList<String>(); // LL(1)文法非终结符 private List<String> terminatingSymbol = new ArrayList<String>(); // LL(1)文法终结符 private boolean isLL1 = false; public Production(List<Grammar> productions,String begin) { super(); this.productions = productions; this.begin=begin; symbolProductions(); } public List<Grammar> getProductions() { return productions; } public List<Character> getSymbols() { return symbols; } public List<String> getNonTerminatingSymbol() { return nonTerminatingSymbol; } public List<String> getTerminatingSymbol() { return terminatingSymbol; } public String getBegin(){ return begin; } public void createLL1() { if (productions.size() != 0) { removeLeftRecursion(); isLL1 = true; } } public void removeLeftRecursion() { for (int i = 0; i < symbols.size(); i++) { for (int j = 0; j < i; j++) { iterativeReplacement(symbols.get(i), symbols.get(j)); } removeLeftRecursion(symbols.get(i)); } no_or_is_terminatingSymbol(); } private void symbolProductions() { if (productions.size() != 0) { for (int i = 0; i < productions.size(); i++) { if (!((ArrayList<Character>) symbols).contains(productions .get(i).getLeft().charAt(0))) { symbols.add(productions.get(i).getLeft().charAt(0)); } } } } private void no_or_is_terminatingSymbol() { for (int i = 0; i < productions.size(); i++) { if (!((ArrayList<String>) nonTerminatingSymbol) .contains(productions.get(i).getLeft())) { nonTerminatingSymbol.add(productions.get(i).getLeft()); } if (productions.get(i).getLeft() == productions.get(i).getLeft() .charAt(0) + "'") { nonTerminatingSymbol.add(productions.get(i).getLeft()); } } for (int i = 0; i < productions.size(); i++) { String temp = productions.get(i).getRight(); temp = temp.replace("epsilon", "#"); for (int j = 0; j < nonTerminatingSymbol.size(); j++) { temp = temp.replaceAll(nonTerminatingSymbol.get(j), ""); } temp = temp.replaceAll("\\|", ""); temp = temp.replaceAll("'", ""); char[] chars = temp.toCharArray(); for (int k = 0; k < chars.length; k++) { if (chars[k] == '#') { if (!terminatingSymbol.contains("epsilon")) { terminatingSymbol.add("epsilon"); } } else { if (!terminatingSymbol.contains(String.valueOf(chars[k]))) { terminatingSymbol.add(String.valueOf(chars[k])); } } } } } private void iterativeReplacement(Character left, Character right) { ListIterator<Grammar> listIterator = productions.listIterator(); while (listIterator.hasNext()) { String inRight = ""; Grammar grammar = listIterator.next(); if (grammar.getLeft().equals(left.toString())) { boolean isReplacement = false; String[] rights = grammar.getRight().split("\\|"); for (int i = 0; i < rights.length; i++) { if (rights[i].startsWith(right.toString())) { isReplacement = true; } } if (isReplacement) { ListIterator<Grammar> _listIterator = productions .listIterator(); while (_listIterator.hasNext()) { Grammar _grammar = _listIterator.next(); if (_grammar.getLeft().equals(right.toString())) { String[] _rights = _grammar.getRight().split("\\|"); for (int i = 0; i < rights.length; i++) { boolean isCheck = false; if (rights[i].startsWith(right.toString())) { isCheck = true; for (int j = 0; j < _rights.length; j++) { String temp = rights[i]; inRight += (temp.replaceFirst( right.toString(), _rights[j]) + "|"); } } if (!isCheck) { inRight += (rights[i] + "|"); } } } } if (inRight.length() != 0) { listIterator.remove(); listIterator.add(new Grammar(left.toString(), inRight .substring(0, inRight.length() - 1))); } } } } } private void removeLeftRecursion(Character left) { ListIterator<Grammar> listIterator = productions.listIterator(); while (listIterator.hasNext()) { Grammar grammar = listIterator.next(); if (grammar.getLeft().equals(left.toString())) { String[] rights = grammar.getRight().split("\\|"); boolean isLeftRecursion = false; for (int i = 0; i < rights.length; i++) { if (rights[i].startsWith(left.toString())) { isLeftRecursion = true; } } if (isLeftRecursion) { listIterator.remove(); String oneRight = "", twoRight = ""; for (int i = 0; i < rights.length; i++) { if (!rights[i].startsWith(left.toString())) { oneRight += (rights[i] .concat(left.toString() + "'") + "|"); } else { twoRight += (rights[i].replaceFirst( left.toString(), "").concat( left.toString() + "'") + "|"); } } listIterator.add(new Grammar(left.toString(), oneRight .substring(0, oneRight.length() - 1))); listIterator.add(new Grammar(left.toString() + "'", twoRight.concat("epsilon"))); } } } } public void createTaken() { if (isLL1) { for (Grammar grammar : productions) { grammar.getLefts().add(grammar.getLeft()); String right = grammar.getRight(); if (right.equals("epsilon")) { grammar.getRights().add(right); } else { while (right.contains("epsilon")) { right=right.replaceFirst("epsilon", "#"); } if (right.length() == 1) { grammar.getRights().add(right); } else { for (int i = 0; i < right.length(); i++) { if ((i + 1 < right.length()) && String.valueOf(right.charAt(i + 1)) .equals("'")) { grammar.getRights().add(right.charAt(i) + "'"); } else { if (!(String.valueOf(right.charAt(i)) .equals("'"))) { if (String.valueOf(right.charAt(i)).equals( "#")) { grammar.getRights().add("epsilon"); } else { grammar.getRights() .add(String.valueOf(right .charAt(i))); } } } } } } } } } @Override public String toString() { String temp = "非终结符: "; for (int i = 0; i < nonTerminatingSymbol.size(); i++) { temp += nonTerminatingSymbol.get(i) + " "; } temp += " 共计:" + nonTerminatingSymbol.size(); temp += "\n终结符: "; for (int i = 0; i < terminatingSymbol.size(); i++) { temp += terminatingSymbol.get(i) + " "; } temp += " 共计:" + terminatingSymbol.size(); temp += "\n消除左递归后的文法:\n"; for (int i = 0; i < productions.size(); i++) { temp += (productions.get(i) + "\n"); } return temp; } } package com.siwanghu.syntaxanalyzer.algorithm; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import com.siwanghu.syntaxanalyzer.bean.Grammar; import com.siwanghu.syntaxanalyzer.bean.TableItem; public class AnalysisTable { private Production production; private Map<String, List<String>> firstMap; private Map<String, List<String>> followMap; private List<TableItem> analysisTable; public AnalysisTable(Production production) { super(); this.production = production; firstMap = new LinkedHashMap<String, List<String>>(); followMap = new LinkedHashMap<String, List<String>>(); } public void createAnalysisTable() { if (production.getProductions().size() != 0) { production.createTaken(); calculateFirstSet(); calculateFollowSet(); createTable(); } } private void initFollowSet() { for (String symbol : production.getNonTerminatingSymbol()) { List<String> list = new ArrayList<String>(); list.add("#"); followMap.put(symbol, list); } } private void calculateFollowSet() { initFollowSet(); int count = 100; for (int i = 0, j = 0; i < production.getNonTerminatingSymbol().size() && j < count; i = (i + 1) % (production.getNonTerminatingSymbol().size()), j++) { String symbol = production.getNonTerminatingSymbol().get(i); for (Grammar grammar : production.getProductions()) { if (grammar.getRight().contains(symbol)) { for (int k = 0; k < grammar.getRights().size(); k++) { if (grammar.getRights().get(k).equals(symbol)) { // 从此开始 boolean canDo = false, meDo = false; for (int n = k; n < grammar.getRights().size() && !(grammar.getRights().get(n).equals("|")) && !canDo; n++) { if (n + 1 < grammar.getRights().size()) { String rightString = grammar.getRights() .get(n + 1); if (production.getNonTerminatingSymbol() .contains(rightString)) { if (!canDo && !meDo) { meDo = true; List<String> leftFirst = firstMap .get(rightString); List<String> symbolFollow = followMap .get(symbol); for (int m = 0; m < leftFirst .size(); m++) { if (!(symbolFollow .contains(leftFirst .get(m))) ) { if (!(leftFirst.get(m) .equals("epsilon"))) if (!(leftFirst.get(m) .equals("|"))) symbolFollow .add(leftFirst .get(m)); } } followMap.put(symbol, symbolFollow); } } else { List<String> symbolFollow = followMap .get(symbol); if (!(symbolFollow .contains(rightString)) && !(rightString.equals("|"))) { symbolFollow.add(rightString); followMap.put(symbol, symbolFollow); canDo = true; } } } } if (k == grammar.getRights().size() - 1 || grammar.getRights().get(k + 1) .equals("|") && !canDo) { String leftSymbol = grammar.getLeft(); List<String> leftFollow = followMap .get(leftSymbol); List<String> symbolFollow = followMap .get(symbol); for (int m = 0; m < leftFollow.size(); m++) { if (!(symbolFollow.contains(leftFollow .get(m)))) { if (!(leftFollow.get(m) .equals("epsilon"))) if (!(leftFollow.get(m).equals("|"))) symbolFollow.add(leftFollow .get(m)); } } followMap.put(symbol, symbolFollow); canDo = true; } else { int nonTerminatingSymbol = 0; int isepsilon = 0; boolean isMe = false; for (int n = k; n < grammar.getRights().size() && !(grammar.getRights().get(n) .equals("|")); n++) { if (n + 1 < grammar.getRights().size()) { String rightString = grammar .getRights().get(n + 1); if (production.getTerminatingSymbol() .contains(rightString)) { isMe = true; } if (production .getNonTerminatingSymbol() .contains(rightString)) { nonTerminatingSymbol++; if (hasepsilon(rightString)) { isepsilon++; } } } } if (nonTerminatingSymbol == isepsilon && !canDo && !isMe) { String leftSymbol = grammar.getLeft(); List<String> leftFollow = followMap .get(leftSymbol); List<String> symbolFollow = followMap .get(symbol); for (int m = 0; m < leftFollow.size(); m++) { if (!(symbolFollow.contains(leftFollow .get(m)))) { if (!(leftFollow.get(m) .equals("epsilon"))) if (!(leftFollow.get(m) .equals("|"))) symbolFollow.add(leftFollow .get(m)); } } followMap.put(symbol, symbolFollow); } } } } } } } } private boolean hasepsilon(String symbol) { for (Grammar grammar : production.getProductions()) { if (grammar.getLeft().equals(symbol)) { int index = grammar.getRights().indexOf("epsilon"); if (index < 0) { return false; } else { if (grammar.getRights().size() == 1 && grammar.getRights().contains("epsilon")) { return true; } else { if (index == grammar.getRights().size() - 1 && grammar.getRights().get(index - 1) .equals("|")) { return true; } else { if (index + 1 < grammar.getRights().size() && grammar.getRights().get(index + 1) .equals("|") && index == 0) { return true; } else { if (index + 1 < grammar.getRights().size() && grammar.getRights().get(index + 1) .equals("|") && index - 1 > 0 && grammar.getRights().get(index - 1) .equals("|")) { return true; } } } } } } } return false; } private void calculateFirstSet() { for (String symbol : production.getNonTerminatingSymbol()) { List<String> listFirst = new ArrayList<String>(); iterativeToFirst(symbol, listFirst); } } private void iterativeToFirst(String symbol, List<String> listFirst) { for (Grammar grammar : production.getProductions()) { if (grammar.getLeft().equals(symbol)) { String[] rights = grammar.getRight().split("\\|"); for (String right : rights) { if (!isStartWithSymbol(right)) { listFirst.add(getStartWithSymbol(right)); } else { iterativeToFirst(getStartWithSymbol(right), listFirst); } } } } firstMap.put(symbol, listFirst); } private boolean isStartWithSymbol(String symbol) { for (String nonTerminatingSymbol : production.getNonTerminatingSymbol()) { if (symbol.startsWith(nonTerminatingSymbol)) { return true; } } return false; } private String getStartWithSymbol(String symbol) { for (String nonTerminatingSymbol : production.getNonTerminatingSymbol()) { if (symbol.startsWith(nonTerminatingSymbol)) { return nonTerminatingSymbol; } } if (symbol.equals("epsilon")) { return "epsilon"; } else { return String.valueOf(symbol.charAt(0)); } } private void createTable() { analysisTable = new ArrayList<TableItem>(); for (Map.Entry<String, List<String>> entry : firstMap.entrySet()) { String key = entry.getKey(); List<String> values = entry.getValue(); Grammar _grammar = null; for (Grammar grammar : production.getProductions()) { if (grammar.getLeft().equals(key)) { _grammar = grammar; } } String[] rights = _grammar.getRight().split("\\|"); for (int i = 0; i < values.size(); i++) { if (values.get(i).equals("epsilon")) { List<String> followList = followMap.get(key); for (String follow : followList) { TableItem tableItem = new TableItem(); tableItem.setNonTerminatingSymbol(key) .setTerminatingSymbol(follow) .setGrammar(new Grammar(key, "epsilon")) .createTaken(); if (!analysisTable.contains(tableItem)) { analysisTable.add(tableItem); } } } else { TableItem tableItem = new TableItem(); tableItem .setNonTerminatingSymbol(key) .setTerminatingSymbol(values.get(i)) .setGrammar( new Grammar(key, getRight(rights, values.get(i)))).createTaken(); if (!analysisTable.contains(tableItem)) { analysisTable.add(tableItem); } } } } } private String getRight(String[] rights, String right) { for (int i = 0; i < rights.length; i++) { if (rights[i].startsWith(right)) { return rights[i]; } } for (int i = 0; i < rights.length; i++) { for (int j = 0; j < production.getNonTerminatingSymbol().size(); j++) { if (rights[i].startsWith(production.getTerminatingSymbol().get( j))) { return rights[i]; } } } return rights[0]; } public String getBegin(){ return production.getBegin(); } public List<TableItem> getAnalysisTable(){ return analysisTable; } public Production getProduction(){ return production; } public void print(){ for(int i=0;i<analysisTable.size();i++){ TableItem tableItem=analysisTable.get(i); System.out.println(tableItem.getNonTerminatingSymbol()+" "+tableItem.getTerminatingSymbol()); System.out.println(tableItem.getGrammar()); } } @Override public String toString() { String temp = ""; for (Grammar grammar : production.getProductions()) { System.out.println(grammar); System.out .println("左部:" + grammar.getLefts().get(0) + "\n" + "右部:"); for (int i = 0; i < grammar.getRights().size(); i++) { System.out.print(grammar.getRights().get(i) + " "); } System.out.print(" 个数:" + grammar.getRights().size() + "\n"); } temp += "\nFirst集:\n"; for (Map.Entry<String, List<String>> entry : firstMap.entrySet()) { temp += "First(" + entry.getKey() + ")" + "={"; List<String> firstList = entry.getValue(); for (String first : firstList) { temp += first + " "; } temp += "}\n"; } temp += "\nFollow集:\n"; for (Map.Entry<String, List<String>> entry : followMap.entrySet()) { temp += "Follow(" + entry.getKey() + ")" + "={"; List<String> followList = entry.getValue(); for (String follow : followList) { temp += follow + " "; } temp += "}\n"; } String table=""; for(int i=0;i<analysisTable.size();i++){ table+=analysisTable.get(i).toString(); } return temp+table; } } package com.siwanghu.syntaxanalyzer.algorithm; import java.util.List; import java.util.Stack; import com.siwanghu.syntaxanalyzer.bean.Grammar; import com.siwanghu.syntaxanalyzer.bean.TableItem; public class SyntaxAnalyzer { private AnalysisTable analysisTable; private Stack<String> stack = new Stack<String>(); public SyntaxAnalyzer(AnalysisTable analysisTable) { super(); this.analysisTable = analysisTable; } public boolean syntaxAnalyer(String syntax) { char[] syntaxs = (syntax + "#").replaceAll(" ", "").toCharArray(); int index = 0; stack.clear(); stack.push("#"); stack.push(analysisTable.getBegin()); while (!(stack.peek().equals("#"))) { if (stack.peek().equals(String.valueOf(syntaxs[index]))) { index++; stack.pop(); } else { TableItem tableItem = getTableItem(stack.peek(), String.valueOf(syntaxs[index])); if (tableItem == null) { throw new RuntimeException("输入的句子语法错误!"); } else { List<String> nextList = tableItem.getGrammar().getRights(); if (nextList.size() == 1 && isterminatingSymbol(nextList.get(0))) { stack.pop(); index++; } else { if (nextList.size() == 1 && isEpsilon(nextList.get(0))) { stack.pop(); } else { stack.pop(); for (int i = nextList.size() - 1; i >= 0; i--) { stack.push(nextList.get(i)); } } } } } } return true; } private boolean isEpsilon(String symbol) { if (symbol.equals("epsilon")) return true; else { return false; } } private boolean isterminatingSymbol(String symbol) { if (analysisTable.getProduction().getTerminatingSymbol() .contains(symbol)&&!isEpsilon(symbol)) return true; else { return false; } } private TableItem getTableItem(String symbol, String syntax) { for (TableItem tableItem : analysisTable.getAnalysisTable()) { if (tableItem.getNonTerminatingSymbol().equals(symbol) && tableItem.getTerminatingSymbol().equals(syntax)) { return tableItem; } } return null; } } package com.siwanghu.syntaxanalyzer.test; import java.util.LinkedList; import java.util.List; import com.siwanghu.syntaxanalyzer.algorithm.AnalysisTable; import com.siwanghu.syntaxanalyzer.algorithm.Production; import com.siwanghu.syntaxanalyzer.algorithm.SyntaxAnalyzer; import com.siwanghu.syntaxanalyzer.bean.Grammar; public class Test { public static void main(String[] args) { /* * System.out.println("The Productions of G"); Grammar g1 = new * Grammar("S", "Qc|c"); Grammar g2 = new Grammar("Q", "Rb|b"); Grammar * g3 = new Grammar("R", "Sa|a"); * * List<Grammar> g_productions = new LinkedList<Grammar>(); * g_productions.add(g3); g_productions.add(g2); g_productions.add(g1); * * Production g_production = new Production(g_productions); * g_production.createLL1(); System.out.print(g_production); * System.out.println("end G\n"); * * AnalysisTable g_analysisTable=new AnalysisTable(g_production); * g_analysisTable.createAnalysisTable(); * System.out.println(g_analysisTable); */ Grammar h1 = new Grammar("E", "E+T|T"); Grammar h2 = new Grammar("T", "T*F|F"); Grammar h3 = new Grammar("F", "(E)|i"); List<Grammar> h_productions = new LinkedList<Grammar>(); h_productions.add(h1); h_productions.add(h2); h_productions.add(h3); Production h_production = new Production(h_productions, "E"); h_production.createLL1(); AnalysisTable h_analysisTable = new AnalysisTable(h_production); h_analysisTable.createAnalysisTable(); SyntaxAnalyzer syntaxAnalyzer = new SyntaxAnalyzer(h_analysisTable); try { if (syntaxAnalyzer.syntaxAnalyer("i*i+i")) { System.out.println("语法正确!"); } } catch (Exception e) { System.out.println(e.getMessage()); } try { if (syntaxAnalyzer.syntaxAnalyer("(i*i)+(i+i)")) { System.out.println("语法正确!"); } } catch (Exception e) { System.out.println(e.getMessage()); } try { if (syntaxAnalyzer.syntaxAnalyer("(i*i)(+(i+i)")) { System.out.println("语法正确!"); } } catch (Exception e) { System.out.println(e.getMessage()); } } }