【读书笔记】-【编程语言的实现模式】-【LL(1)递归下降的词法解析器】

形如:[a,b,c] [a,[b,cd],f] 为 嵌套列表

其ANTLR文法表示:


list :'[' elements ']'; // 匹配方括号
elements : elements (',' element)*; // 匹配list的逗号
element : NAME | list; // element是NAME或者嵌套的list
NAME : ('a'..'z' | 'A'..'Z')+; // NAME含有至少一个字母

具体实现:

public class Token {
    public int type;
    public String text;

    public Token(int type, String text) {
        this.type = type;
        this.text = text;
    }

    @Override
    public String toString() {
        String tname = ListLexer.tokenNames[type];
        return "<'"+text+"',"+tname+">";
    }
}
public class ListLexer extends Lexer{
    public static int NAME = 2;
    public static int COMMA = 3;
    public static int LBRACK = 4;
    public static int RBRACK = 5;
    public static String[] tokenNames = {"n/a","<EOF>","NAME","COMMA","LBRACK","RBRACK"};
    public String getTokenName(int x) {
        return tokenNames[x];
    }
    boolean isLETTER() {
        return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
    }


    public ListLexer(String input) {
        super(input);
    }

    public Token nextToken() {
        while (c != EOF) {
            switch (c) {
                case ' ':case '\t':case '\n':case '\r':WS(); continue;
                case ',':consume();return new Token(COMMA,",");
                case '[':consume();return new Token(LBRACK,"[");
                case ']':consume();return new Token(RBRACK,"]");
                default:
                    if (isLETTER()) {
                        return NAME();
                    } else {
                        throw new Error("invalid character:"+c);
                    }
            }

        }
        return new Token(EOF_TYPE,"<EOF>");
    }

    Token NAME() {
        // NAME由一个或者多个字母组成
        StringBuilder buf = new StringBuilder();
        do {
            buf.append(c);
            consume();
        } while(isLETTER());
        return new Token(NAME, buf.toString());

    }

    void WS() {
        // 忽略所有空白符
        while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
            consume();
        }
    }
}
public abstract class Lexer {
    public static final char EOF = (char)-1;
    public static final int EOF_TYPE = 1; // 表示EOF的词法类型
    String input; // 待解析的字符串
    int p = 0; // 当前输入字符串的下标
    char c; // 当前字符

    public Lexer(String input) {
        this.input = input;
        c = input.charAt(p);
    }


    public void consume() {
        // 向前移动一个字符,检验输入是否结束
        p++;
        if (p >= input.length()) {
            c = EOF;
        } else {
            c = input.charAt(p);
        }
    }

    public void match(char x) {
        if (c == x) {
            consume();
        } else {
            throw new Error("expecting "+ x + "; found"+c);
        }
    }

    public abstract Token nextToken();
    public abstract String getTokenName(int tokenType);
}
public class Test {
    public static void main(String[] args) {
        ListLexer lexer = new ListLexer("[abc,]a,b]]");
        Token t = lexer.nextToken();
        while (t.type != Lexer.EOF_TYPE) {
            System.out.println(t);
            t = lexer.nextToken();
        }
        System.out.println(t);
    }

}

输出结果:
<'[',LBRACK>
<'abc',NAME>
<',',COMMA>
<'c',NAME>
<']',RBRACK>
<'a',NAME>
<',',COMMA>
<'b',NAME>
<']',RBRACK>
<']',RBRACK>
<' ', >

你可能感兴趣的:(编程语言)