形如:[a,b,c] [a,[b,cd],f] 为 嵌套列表
其ANTLR文法表示:
list :'[' elements ']'; // 匹配方括号
elements : elements (',' element)*; // 匹配list的逗号
element : NAME | list; // element是NAME或者嵌套的list
NAME : ('a'..'z' | 'A'..'Z')+; // NAME含有至少一个字母具体实现:
public class Token { public int type; public String text; public Token(int type, String text) { this.type = type; this.text = text; } @Override public String toString() { String tname = ListLexer.tokenNames[type]; return "<'"+text+"',"+tname+">"; } }
public class ListLexer extends Lexer{ public static int NAME = 2; public static int COMMA = 3; public static int LBRACK = 4; public static int RBRACK = 5; public static String[] tokenNames = {"n/a","<EOF>","NAME","COMMA","LBRACK","RBRACK"}; public String getTokenName(int x) { return tokenNames[x]; } boolean isLETTER() { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; } public ListLexer(String input) { super(input); } public Token nextToken() { while (c != EOF) { switch (c) { case ' ':case '\t':case '\n':case '\r':WS(); continue; case ',':consume();return new Token(COMMA,","); case '[':consume();return new Token(LBRACK,"["); case ']':consume();return new Token(RBRACK,"]"); default: if (isLETTER()) { return NAME(); } else { throw new Error("invalid character:"+c); } } } return new Token(EOF_TYPE,"<EOF>"); } Token NAME() { // NAME由一个或者多个字母组成 StringBuilder buf = new StringBuilder(); do { buf.append(c); consume(); } while(isLETTER()); return new Token(NAME, buf.toString()); } void WS() { // 忽略所有空白符 while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { consume(); } } }
public abstract class Lexer { public static final char EOF = (char)-1; public static final int EOF_TYPE = 1; // 表示EOF的词法类型 String input; // 待解析的字符串 int p = 0; // 当前输入字符串的下标 char c; // 当前字符 public Lexer(String input) { this.input = input; c = input.charAt(p); } public void consume() { // 向前移动一个字符,检验输入是否结束 p++; if (p >= input.length()) { c = EOF; } else { c = input.charAt(p); } } public void match(char x) { if (c == x) { consume(); } else { throw new Error("expecting "+ x + "; found"+c); } } public abstract Token nextToken(); public abstract String getTokenName(int tokenType); }
public class Test { public static void main(String[] args) { ListLexer lexer = new ListLexer("[abc,]a,b]]"); Token t = lexer.nextToken(); while (t.type != Lexer.EOF_TYPE) { System.out.println(t); t = lexer.nextToken(); } System.out.println(t); } }
输出结果:
<'[',LBRACK>
<'abc',NAME>
<',',COMMA>
<'c',NAME>
<']',RBRACK>
<'a',NAME>
<',',COMMA>
<'b',NAME>
<']',RBRACK>
<']',RBRACK>
<'', >