简单词法分析器java版本

Error为错误提示类,将错误信息保存在Error中。用于输出error.txt

Word为关键字定义,将关键字信息保存在Word中,用于对单词的验证

Lexer 为词法分析核心类,包含对源程序的分析函数,供外部调用

Error.java 错误提示类

/**
 * 错误信息
 * @author Administrator
 *
 */
public class Error {
	//行号
	private Integer hh;
	//列号
	private Integer lh;
	//错误信息
	private String msg;
	public Error(Integer hh, Integer lh, String msg) {
		this.hh = hh;
		this.lh = lh;
		this.msg = msg;
	}
	public Integer getHh() {
		return hh;
	}
	public void setHh(Integer hh) {
		this.hh = hh;
	}
	public Integer getLh() {
		return lh;
	}
	public void setLh(Integer lh) {
		this.lh = lh;
	}
	public String getMsg() {
		return msg;
	}
	public void setMsg(String msg) {
		this.msg = msg;
	}
}

Word.java关键字类

/**
 * 定义的关键字
 * @author jiangliuhong 
 * @createTime 2016年5月23日 上午10:36:21 
 * @function
 */
public class Word  {  
    public static final String   
        and = "&&",  
        or ="||",  
        eq = "==",  
        e = "=",
        ne = "!=",  
        n = "!",
        le ="<=",  
        l = "<",
        ge = ">=",
        g = ">",
        True = "true",  
        False = "false",
        //变量类型
        Int = "int", 
		Double = "double",
		Float = "float",
		Char = "char", 
		Boolean = "boolean",
		String = "String",
    	Private = "private",
    	Public = "public",
    	Protected = "protected";
}  

Lexer.java 词法分析核心类

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class Lexer {
	/* 记录行号 */
	public static int line = 1;
	/* 记录列号 */
	public static int rows = 0;
	/* 下一个读入字符 */
	char peek = ' ';
	Hashtable words = new Hashtable();
	/* erros 表 */
	private Hashtable errors = new Hashtable();
	/* token序列 */
	private List tokens = new LinkedList();
	/* 源地址 */
	private String SOURCE_PATH = "";
	/* 符号表地址 */
	private String SYMBOL_TABLE = "";
	/* Tokens表 */
	private String TOKEN_TABLE = "";
	/* 错误信息表 */
	private String ERROR_TABLE = "";
	/* 读取文件变量 */
	BufferedReader reader = null;
	/* 保存当前是否读取到了文件的结尾 */
	private Boolean isEnd = false;

	/* 是否读取到文件的结尾 */
	public Boolean getReaderState() {
		return this.isEnd;
	}
	/*
	 * 构造函数中将关键字和类型添加到hashtable words中
	 */
	public Lexer(String sourcepath, String symobolpath, String tokenpath, String errortable) {
		SOURCE_PATH = sourcepath;
		SYMBOL_TABLE = symobolpath;
		TOKEN_TABLE = tokenpath;
		ERROR_TABLE = errortable;
		/* 初始化读取文件变量 */
		try {
			reader = new BufferedReader(new FileReader(SOURCE_PATH));
		} catch (IOException e) {
			System.out.print(e);
		}
		/* 关键字 */
		reserve(Word.and, "运算符");
		reserve(Word.or, "运算符");
		reserve(Word.eq, "运算符");
		reserve(Word.ne, "运算符");
		reserve(Word.e, "运算符");
		reserve(Word.g, "运算符");
		reserve(Word.l, "运算符");
		reserve(Word.n, "运算符");
		reserve(Word.True, "关键字");
		reserve(Word.False, "关键字");
		reserve(Word.Int, "关键字");
		reserve(Word.Double, "关键字");
		reserve(Word.Float, "关键字");
		reserve(Word.Char, "关键字");
		reserve(Word.Boolean, "关键字");
		reserve(Word.String, "关键字");
		reserve(Word.Private, "关键字");
		reserve(Word.Protected, "关键字");
	}
	void reserve(String key, String vaule) {
		words.put(key, vaule);
	}
	/* 读文件字符 */
	public void readch() throws IOException {
		/* 这里应该是使用的是 */
		peek = (char) reader.read();
		if ((int) peek == 0xffff) {
			this.isEnd = true;
		}
		// peek = (char)System.in.read();
	}
	/* 读文件下一个字符 */
	public Boolean readch(char ch) throws IOException {
		readch();
		if (this.peek != ch) {
			return false;
		}
		this.peek = ' ';
		return true;
	}
	/* 程序词法分析 */
	public String scan() throws IOException {
		/* 消除空白 */
		for (;; readch()) {
			if (peek == ' ' || peek == '\t') {
				rows++;
				continue;
			} else if (peek == '\n') {
				line = line + 1;
				rows = 0;
			} else
				break;
		}
		rows++;
		/* 下面开始分割关键字,标识符等信息 */
		switch (peek) {
		/* 对于 ==, >=, <=, !=的区分使用状态机实现 */
		case '=':
			if (readch('=')) {
				tokens.add("==");
				return Word.eq;
			} else {
				tokens.add("=");
				return Word.e;
			}
		case '>':
			if (readch('=')) {
				tokens.add(">=");
				return Word.ge;
			} else {
				tokens.add(">");
				return Word.g;
			}
		case '<':
			if (readch('=')) {
				tokens.add("<=");
				return Word.le;
			} else {
				tokens.add("<");
				return Word.l;
			}
		case '!':
			if (readch('=')) {
				tokens.add("!=");
				return Word.ne;
			} else {

				tokens.add("!");
				return Word.n;
			}
		case '~':
		case '@':
		case '$':
		case '%':
		case '^':
		case '&':
			/*
			 * if(peek == ' ' || peek == '\t'){ tokens.add(peek+"");
			 * errors.put(peek+"", new Error(line,rows,"单独的符号错误")); return
			 * peek+""; }
			 */
			String vs = "";
			do {
				vs += peek;
				readch();
				if(peek == ' ' || peek == '\t'){
					break;
				}
			} while (true);
			errors.put(vs, new Error(line, rows, "符号命名错误"));
			return vs;
		}
		/*
		 * 下面是对数字的识别,根据文法的规定的话,这里的 数字只要是能够识别整数就行.
		 */
		if (Character.isDigit(peek)) {
			int value = 0;
			String vs = "";
			do {
				value = 10 * value + Character.digit(peek, 10);
				vs += value;
				readch();
			} while (Character.isDigit(peek));
			if ((peek >= 'a' && peek <= 'z') || (peek >= 'A' && peek <= 'Z')) {
				vs = vs + peek;
				checkTag(vs);
				tokens.add(vs);
				readch();
				return vs;
			} else {
				tokens.add(value + "");
				words.put(value + "", "数字");
				return value + "";
			}
		}
		/*
		 * 关键字或者是标识符的识别
		 */
		if (Character.isLetter(peek)) {
			StringBuffer sb = new StringBuffer();

			/* 首先得到整个的一个分割 */
			do {
				sb.append(peek);
				readch();
			} while (Character.isLetterOrDigit(peek));
			/* 判断是关键字还是标识符 */
			String s = sb.toString();
			String t = words.get(s);
			/* t 为关键字 */
			/* 如果是关键字或者是类型的话,w不应该是空的 */
			if (t != null) {
				tokens.add(s);
				return s; /* 说明是关键字 或者是类型名 */
			}
			/* 否则就是一个标识符id */
			if (checkTag(s)) {
				tokens.add(s);
				words.put(s, "标识符");
				return s;
			} else {
				return s;
			}
		}
		/* peek中的任意字符都被认为是词法单元返回 */
		String ss = "" + (char) peek;
		// table.put(tok, "Token or Seprator");
		if ((int) peek != 0xffff)
			tokens.add(ss);
		peek = ' ';
		return ss;
	}
	/* 标识符规则 */
	public boolean checkTag(String str) {
		char beg = str.charAt(0);
		/* 不能以符号开头 */
		switch (beg) {
		/* 不能以数字开头 */
		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
		case '8':
		case '9':
			errors.put(str, new Error(line, rows, "不能用数字开头"));
			return false;
		case '~':
		case '!':
		case '@':
		case '$':
		case '%':
		case '^':
		case '&':
		case '*':
		case '(':
		case ')':
			errors.put(str, new Error(line, rows, "不能用符号开头"));
			return false;
		default:
			return true;
		}
	}
	/* 保存error */
	public void saveErrorTable() throws IOException {
		FileWriter writer = new FileWriter(ERROR_TABLE);
		writer.write("[符号]          [错误类型]          [错误位置]\n");
		writer.write("\r\n");
		Enumeration e = errors.keys();
		while (e.hasMoreElements()) {
			String key = e.nextElement();
			Error err = errors.get(key);
			writer.write(key + "\t\t\t" + err.getMsg() + "\t\t\t" + err.getHh() + "行" + err.getLh() + "列" + "\r\n");
		}
		writer.flush();
	}
	/* 保存Tokens */
	public void saveTokens() throws IOException {
		FileWriter writer = new FileWriter(TOKEN_TABLE);
		writer.write("[符号]  \n");
		writer.write("\r\n");

		for (int i = 0; i < tokens.size(); ++i) {
			String tok = tokens.get(i);
			/* 写入文件 */
			writer.write(tok + "\r\n");
		}
		writer.flush();
	}
	/* 保存存储在table中的 */
	public void saveSymbolsTable() throws IOException {
		FileWriter writer = new FileWriter(SYMBOL_TABLE);
		writer.write("[符号]          [符号类型信息]\n");
		writer.write("\r\n");
		for (int i = 0; i < tokens.size(); ++i) {
			String tok = tokens.get(i);
			String desc = words.get(tok);
			if (desc != null) {
				/* 写入文件 */
				writer.write(tok + "\t\t\t" + desc + "\r\n");
			}
		}
		writer.flush();
	}
}

Main.java 测试程序

public class Main {
	public static void main(String[] args) throws Exception {
		 Lexer lexer = new Lexer("src/yuan.txt","src/symbol.txt","src/token.txt","src/error.txt");  
	        while (lexer.getReaderState() == false) {  
	            lexer.scan();
	        }  
	        /* 保存相关信息 */  
	        lexer.saveTokens();  
	        lexer.saveSymbolsTable();  
	        lexer.saveErrorTable();
	        System.out.println("ok");
	}
}
输入的yuan.txt

public class Text{
	private String @aa = "";
	private int a = 111;
	private int b = 2;
	private int c = 3;
	public static void main(String[] args){
		double 0a = 1;
		int s = 0 ;
		s = a + b + c;
		if(s == 23)
		System.out.println(s);
	}
}
输出结果

symbol.txt 输出的符号表

[符号]          [符号类型信息]
public			标识符
class			标识符
Text			标识符
private			关键字
String			关键字
=			运算符
private			关键字
int			关键字
a			标识符
=			运算符
111			数字
private			关键字
int			关键字
b			标识符
=			运算符
2			数字
private			关键字
int			关键字
c			标识符
=			运算符
3			数字
public			标识符
static			标识符
void			标识符
main			标识符
String			关键字
args			标识符
double			关键字
=			运算符
1			数字
int			关键字
s			标识符
=			运算符
0			数字
s			标识符
=			运算符
a			标识符
b			标识符
c			标识符
if			标识符
s			标识符
==			运算符
23			数字
System			标识符
out			标识符
println			标识符
s			标识符

error.txt 输出的错误信息表

[符号]          [错误类型]          [错误位置]
@aa			符号命名错误			2行6列
0a			不能用数字开头			7行5列







你可能感兴趣的:(java)