我们来看看rulelist,它是整个ABNF文法的入口,就是说一个ABNF文法就是一个规则列表rulelist。一个rulelist由若干个rule规则组成,每个rule由规则名rulename、定义方式define-as和元素elements构成。
先来看解析代码:
/* This file is one of the component a Context-free Grammar Parser Generator, which accept a piece of text as the input, and generates a parser for the inputted context-free grammar. Copyright (C) 2013, Junbiao Pan (Email: [email protected]) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ public class AbnfParser { // rulelist = 1*( rule / (*c-wsp c-nl) ) protected List<Rule> rulelist() throws IOException, MatchException, CollisionException { Map<RuleName, Rule> ruleMap = new HashMap<RuleName, Rule>(); List<Rule> ruleList = new ArrayList<Rule>(); // 如果前向字符是字母、空格、分号、回车,则认为是rule、c-wsp或者c-nl while (match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A) || match(is.peek(), 0x20) || match(is.peek(), ';') || match(is.peek(), 0x0D)) { // 如果是字母开头,则认为是rule,否则是c-wsp或者c-nl if (match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A)) { // 解析一条规则 Rule rule = rule(); // 判断该条规则是否已经有有定义 if (null == ruleMap.get(rule.getRuleName())) { // 如果没有定义则放入规则列表 ruleMap.put(rule.getRuleName(), rule); ruleList.add(rule); } else { // 已有定义,则检查定义方式是否为增量定义 Rule defined = ruleMap.get(rule.getRuleName()); if ("=".equals(rule.getDefinedAs()) && "=".equals(defined.getDefinedAs())) { // 如果不是增量定义,则抛出重复定义异常 throw new CollisionException(rule.getRuleName().toString() + " is redefined.", is.getPos(), is.getLine()); } // 如果是增量定义则合并两条规则 if ("=".equals(rule.getDefinedAs())) defined.setDefinedAs("="); defined.getElements().getAlternation().getConcatenations().addAll(rule.getElements().getAlternation().getConcatenations()); } } else { // 空格、分号、回车,则是c_wsp while (match(is.peek(), 0x20) || match(is.peek(), ';') || match(is.peek(), 0x0D)) { c_wsp(); } c_nl(); } } return ruleList; } // rulename = ALPHA *(ALPHA / DIGIT / "-") protected RuleName rulename() throws IOException, MatchException { // ALPHA = %x41-5A / %x61-7A ; A-Z / a-z // DIGIT = %x30-39 // 规则名的第一个字符必须是字母 if (!(match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A))) { throw new MatchException("'A'-'Z'/'a'-'z'", is.peek(), is.getPos(), is.getLine()); } String rulename = ""; rulename += (char)is.read(); // 规则名的后续字符可以是字母、数字、破折号 while (match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A) || match(is.peek(), 0x30, 0x39) |match(is.peek(), '-')) { rulename += (char)is.read(); } return new RuleName(prefix, rulename); } // defined-as = *c-wsp ("=" / "=/") *c-wsp protected String defined_as() throws IOException, MatchException { String value = ""; // 等号前面的空格 while (match(is.peek(), 0x20) || match(is.peek(), 0x09) || match(is.peek(), ';') || match(is.peek(), (char)0x0D)) { c_wsp(); } // 等号 assertMatch(is.peek(), '='); value = String.valueOf((char)is.read()); // 是否增量定义 if (match(is.peek(), '/')) { value += (char)is.read(); } // 等号后面的空格 while (match(is.peek(), 0x20) || match(is.peek(), 0x09) || match(is.peek(), ';') || match(is.peek(), (char)0x0D)) { c_wsp(); } return value; } // elements = alternation *c-wsp protected Elements elements() throws IOException, MatchException { // 元素elements其实就是alternation再接着若干空格 Alternation alternation = alternation(); while (match(is.peek(), 0x20) || match(is.peek(), 0x09) || match(is.peek(), ';')) { c_wsp(); } return new Elements(alternation); } }
单元测试就不罗嗦了,直接看代码。
/* This file is one of the component a Context-free Grammar Parser Generator, which accept a piece of text as the input, and generates a parser for the inputted context-free grammar. Copyright (C) 2013, Junbiao Pan (Email: [email protected]) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ // rulename = ALPHA *(ALPHA / DIGIT / "-") @Test public void testRulename() throws Exception { Tester<String> tester = new Tester<String>() { public String test(AbnfParser parser) throws MatchException, IOException { return parser.rulename().toString(); } }; // 用各种奇怪的情形来虐 Assert.assertEquals("A", AbnfParserFactory.newInstance("A").rulename().toString()); Assert.assertEquals("a", AbnfParserFactory.newInstance("a").rulename().toString()); Assert.assertEquals("Z", AbnfParserFactory.newInstance("Z").rulename().toString()); Assert.assertEquals("z", AbnfParserFactory.newInstance("z").rulename().toString()); Assert.assertEquals("ABCDEFGHIJKLMNOPQRSTUVWXYZ", AbnfParserFactory.newInstance("ABCDEFGHIJKLMNOPQRSTUVWXYZ").rulename().toString()); Assert.assertEquals("abcdefghijklmnopqrstuvwxyz", AbnfParserFactory.newInstance("abcdefghijklmnopqrstuvwxyz").rulename().toString()); Assert.assertEquals("AbCdEfGhIjKlMnOpQrStUvWxYz", AbnfParserFactory.newInstance("AbCdEfGhIjKlMnOpQrStUvWxYz").rulename().toString()); Assert.assertEquals("aBcDeFgHiJkLmNoPqRsTuVwXyZ", AbnfParserFactory.newInstance("aBcDeFgHiJkLmNoPqRsTuVwXyZ").rulename().toString()); Assert.assertEquals("A1234567890Z", AbnfParserFactory.newInstance("A1234567890Z").rulename().toString()); Assert.assertEquals("a1234567890z", AbnfParserFactory.newInstance("a1234567890z").rulename().toString()); Assert.assertEquals("A1B2C3D4e5f6g7h8i9j0", AbnfParserFactory.newInstance("A1B2C3D4e5f6g7h8i9j0").rulename().toString()); Assert.assertEquals("A1B2C3D4e5f6g7h8i9j0aBcDeFgHiJkLmNoPqRsTuVwXyZAbCdEfGhIjKlMnOpQrStUvWxYz", AbnfParserFactory.newInstance("A1B2C3D4e5f6g7h8i9j0aBcDeFgHiJkLmNoPqRsTuVwXyZAbCdEfGhIjKlMnOpQrStUvWxYz").rulename().toString()); Assert.assertEquals("A1b2C3-4d-", AbnfParserFactory.newInstance("A1b2C3-4d-").rulename().toString()); Assert.assertEquals("a-------------", AbnfParserFactory.newInstance("a-------------").rulename().toString()); Assert.assertEquals("A1b2C3-4d-", AbnfParserFactory.newInstance("A1b2C3-4d-#").rulename().toString()); Assert.assertEquals("A1b2C3-4d-", AbnfParserFactory.newInstance("A1b2C3-4d-..").rulename().toString()); Assert.assertEquals("RFC3261-A1b2C3-4d-", AbnfParserFactory.newInstance("RFC3261-", "A1b2C3-4d-*&^").rulename().toString()); Assertion.assertMatchException("1", tester, 1, 1); Assertion.assertMatchException("1234567890", tester, 1, 1); Assertion.assertMatchException("-", tester, 1, 1); Assertion.assertMatchException("-----------", tester, 1, 1); Assertion.assertMatchException("#", tester, 1, 1); Assertion.assertMatchException(".", tester, 1, 1); Assertion.assertMatchException("~", tester, 1, 1); Assertion.assertMatchException(")", tester, 1, 1); Assertion.assertMatchException("", tester, 1, 1); } // rulelist = 1*( rule / (*c-wsp c-nl) ) @Test public void testRulelist() throws Exception { Tester<List<Rule>> tester = new Tester<List<Rule>>() { @Override public List<Rule> test(AbnfParser parser) throws MatchException, IOException, CollisionException { return parser.rulelist(); } }; String input, rulesInput = ""; input = "a=b" + (char)0x0d + (char)0x0a; rulesInput += input; List<Rule> rules = new ArrayList<Rule>(); rules.add(AbnfParserFactory.newInstance(input).rule()); input = "b=*c" + (char)0x0d + (char)0x0a; rulesInput += input; rules.add(AbnfParserFactory.newInstance(input).rule()); input = "c=[d]" + (char)0x0d + (char)0x0a; rulesInput += input; rules.add(AbnfParserFactory.newInstance(input).rule()); input = "d=a/b/c/e" + (char)0x0d + (char)0x0a; rulesInput += input; rules.add(AbnfParserFactory.newInstance(input).rule()); input = "e=f/(g h)" + (char)0x0d + (char)0x0a; rulesInput += input; rules.add(AbnfParserFactory.newInstance(input).rule()); input = "g=i [j]" + (char)0x0d + (char)0x0a; rulesInput += input; rules.add(AbnfParserFactory.newInstance(input).rule()); input = "j=<abcd#1234>" + (char)0x0d + (char)0x0a; rulesInput += input; rules.add(AbnfParserFactory.newInstance(input).rule()); input = "j=/\"abcd#1234\"" + (char)0x0d + (char)0x0a; rulesInput += input; rules.add(AbnfParserFactory.newInstance(input).rule()); Assertion.assertMatch(rulesInput, tester, AbnfParserFactory.newInstance(rulesInput).rulelist(), 1, 9); } // rule = rulename defined-as elements c-nl @Test public void testRule() throws Exception { Tester<Rule> tester = new Tester<Rule>() { @Override public Rule test(AbnfParser parser) throws MatchException, IOException { return parser.rule(); } }; String input; Elements elements; elements = AbnfParserFactory.newInstance("b").elements(); Assertion.assertMatch("a=b" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2); Assertion.assertMatch("a=/b" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2); elements = AbnfParserFactory.newInstance("b/c").elements(); Assertion.assertMatch("a=b/c" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2); Assertion.assertMatch("a=/b/c" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2); elements = AbnfParserFactory.newInstance("b c d").elements(); Assertion.assertMatch("a=b c d" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2); Assertion.assertMatch("a=/b c d" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2); elements = AbnfParserFactory.newInstance("[b]").elements(); Assertion.assertMatch("a=[b]" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2); Assertion.assertMatch("a=/[b]" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2); elements = AbnfParserFactory.newInstance("*b").elements(); Assertion.assertMatch("a=*b" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2); Assertion.assertMatch("a=/*b" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2); } //WSP = SP / HTAB // VCHAR = %x21-7E // comment = ";" *(WSP / VCHAR) CRLF // c-nl = comment / CRLF // c-wsp = WSP / (c-nl WSP) // defined-as = *c-wsp ("=" / "=/") *c-wsp @Test public void testDefined_as() throws Exception { Tester<String> tester = new Tester() { public String test(AbnfParser parser) throws MatchException, IOException { return parser.defined_as(); } }; Assertion.assertMatchException("", tester, 1, 1); Assertion.assertMatch("=", tester, "=", 2, 1); Assertion.assertMatchException("/", tester, 1, 1); Assertion.assertMatch("=A", tester, "=", 2, 1); Assertion.assertMatchException("A=", tester, 1, 1); Assertion.assertMatch("=/", tester, "=/", 3, 1); Assertion.assertMatchException(".=/", tester, 1, 1); Assertion.assertMatch("=/#", tester, "=/", 3, 1); Assertion.assertMatchException("#=/", tester, 1, 1); Assertion.assertMatch(" = ", tester, "=", 10, 1); Assertion.assertMatch(" = =", tester, "=", 10, 1); Assertion.assertMatch(" =/ ", tester, "=/", 11, 1); Assertion.assertMatch(" =/ 3", tester, "=/", 11, 1); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +" " + "=", tester, "=", 3, 2); Assertion.assertMatchException("" + (char)0x0D + (char)0x0A + "=", tester, 1, 2); Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + " " + "==", tester, 3, 2); // Can not handle following case // TODO System.out.println("***************************"); Assertion.assertMatchException("" + (char) 0x0D + (char) 0x0A + " " + "=;", tester, 4, 2); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +" " + "=" + (char)0x0D + (char)0x0A +" ", tester, "=", 2, 3); // Can not handle following case // TODO Assertion.assertMatchException("" + (char) 0x0D + (char) 0x0A + " " + "=" + (char) 0x0D + (char) 0x0A, tester, 1, 3); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +" " + "=" + (char)0x0D + (char)0x0A +" =", tester, "=", 2, 3); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +" " + "=/", tester, "=/", 4, 2); Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + " " + "=//", tester, "=/", 4, 2); Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + " " + "=/" + (char) 0x0D + (char) 0x0A + " ", tester, "=/", 2, 3); Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + " " + "=/" + (char) 0x0D + (char) 0x0A + " ==", tester, "=/", 2, 3); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=", tester, "=", 3, 2); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "====", tester, 3, 2); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=" + (char)0x0D + (char)0x0A +(char)0x09, tester, "=", 2, 3); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=" + (char)0x0D + (char)0x0A +(char)0x09 + "=", tester, "=", 2, 3); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=/", tester, "=/", 4, 2); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=/=/", tester, "=/", 4, 2); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=/" + (char)0x0D + (char)0x0A +(char)0x09, tester, "=/", 2, 3); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=/" + (char)0x0D + (char)0x0A +(char)0x09 + "/=/=/", tester, "=/", 2, 3); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "=", tester, "=", 3, 3); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "==/=/=/", tester, "=", 3, 3); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "=" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09, tester, "=", 2, 5); Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "=" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "/", tester, "=", 2, 5); Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=/", tester, "=/", 4, 3); Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=/=", tester, "=/", 4, 3); Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=/" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09, tester, "=/", 2, 5); // Can not handle following case // TODO Assertion.assertMatchException( "" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=/" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A, tester, 1, 5 ); Assertion.assertMatch( "" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=/" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=", tester, "=/", 2, 5); Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "=", tester, "=", 4, 1); Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "==/", tester, "=", 4, 1); Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "=" + (char)0x09 + (char)0x09, tester, "=", 6, 1); Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "=/", tester, "=/", 5, 1); Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "=/=", tester, "=/", 5, 1); Assertion.assertMatch("" + (char) 0x09 + (char) 0x09 + "=/" + (char) 0x09 + (char) 0x09, tester, "=/", 7, 1); Assertion.assertMatch("" + (char) 0x09 + (char) 0x09 + "=/" + (char) 0x09 + (char) 0x09 + "=", tester, "=/", 7, 1); Assertion.assertMatch( "; ; ; \"" + (char) 0x0D + (char) 0x0A + " " + "=" + "; ; ; \"" + (char) 0x0D + (char) 0x0A + " ", tester, "=", 2, 3); Assertion.assertMatch( "; ; ; \"" + (char)0x0D + (char)0x0A + " " + "=" + "; ; ; \"" + (char)0x0D + (char)0x0A + " /", tester, "=", 2, 3); Assertion.assertMatch( "; ; ; \"" + (char) 0x0D + (char) 0x0A + " " + "=/" + "; ; ; \"" + (char) 0x0D + (char) 0x0A + " ", tester, "=/", 2, 3); Assertion.assertMatch( "; ; ; \"" + (char)0x0D + (char)0x0A + " " + "=/" + "; ; ; \"" + (char)0x0D + (char)0x0A + " =/", tester, "=/", 2, 3); } // elements = alternation *c-wsp @Test public void testElements() throws Exception { Tester<Elements> tester = new Tester<Elements>() { @Override public Elements test(AbnfParser parser) throws MatchException, IOException { return parser.elements(); } }; String input; input = "A/B/C"; Assertion.assertMatch(input, tester, AbnfParserFactory.newInstance(input).elements(), 6, 1); // TODO input = "A/B/C "; Assertion.assertMatchException(input, tester, 8, 1); }
单元测试里面体现出来的一些算法缺陷,我会写一篇帖子专门总结一下。
到这里,文法解析器就基本完成了,下一篇我会以SIP(RFC3261)协议为例子,生成SIP的ABNF文法表示。