Error为错误提示类,将错误信息保存在Error中。用于输出error.txt
Word为关键字定义,将关键字信息保存在Word中,用于对单词的验证
Lexer 为词法分析核心类,包含对源程序的分析函数,供外部调用
Error.java 错误提示类
/**
* 错误信息
* @author Administrator
*
*/
public class Error {
//行号
private Integer hh;
//列号
private Integer lh;
//错误信息
private String msg;
public Error(Integer hh, Integer lh, String msg) {
this.hh = hh;
this.lh = lh;
this.msg = msg;
}
public Integer getHh() {
return hh;
}
public void setHh(Integer hh) {
this.hh = hh;
}
public Integer getLh() {
return lh;
}
public void setLh(Integer lh) {
this.lh = lh;
}
public String getMsg() {
return msg;
}
public void setMsg(String msg) {
this.msg = msg;
}
}
/**
* 定义的关键字
* @author jiangliuhong
* @createTime 2016年5月23日 上午10:36:21
* @function
*/
public class Word {
public static final String
and = "&&",
or ="||",
eq = "==",
e = "=",
ne = "!=",
n = "!",
le ="<=",
l = "<",
ge = ">=",
g = ">",
True = "true",
False = "false",
//变量类型
Int = "int",
Double = "double",
Float = "float",
Char = "char",
Boolean = "boolean",
String = "String",
Private = "private",
Public = "public",
Protected = "protected";
}
Lexer.java 词法分析核心类
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class Lexer {
/* 记录行号 */
public static int line = 1;
/* 记录列号 */
public static int rows = 0;
/* 下一个读入字符 */
char peek = ' ';
Hashtable words = new Hashtable();
/* erros 表 */
private Hashtable errors = new Hashtable();
/* token序列 */
private List tokens = new LinkedList();
/* 源地址 */
private String SOURCE_PATH = "";
/* 符号表地址 */
private String SYMBOL_TABLE = "";
/* Tokens表 */
private String TOKEN_TABLE = "";
/* 错误信息表 */
private String ERROR_TABLE = "";
/* 读取文件变量 */
BufferedReader reader = null;
/* 保存当前是否读取到了文件的结尾 */
private Boolean isEnd = false;
/* 是否读取到文件的结尾 */
public Boolean getReaderState() {
return this.isEnd;
}
/*
* 构造函数中将关键字和类型添加到hashtable words中
*/
public Lexer(String sourcepath, String symobolpath, String tokenpath, String errortable) {
SOURCE_PATH = sourcepath;
SYMBOL_TABLE = symobolpath;
TOKEN_TABLE = tokenpath;
ERROR_TABLE = errortable;
/* 初始化读取文件变量 */
try {
reader = new BufferedReader(new FileReader(SOURCE_PATH));
} catch (IOException e) {
System.out.print(e);
}
/* 关键字 */
reserve(Word.and, "运算符");
reserve(Word.or, "运算符");
reserve(Word.eq, "运算符");
reserve(Word.ne, "运算符");
reserve(Word.e, "运算符");
reserve(Word.g, "运算符");
reserve(Word.l, "运算符");
reserve(Word.n, "运算符");
reserve(Word.True, "关键字");
reserve(Word.False, "关键字");
reserve(Word.Int, "关键字");
reserve(Word.Double, "关键字");
reserve(Word.Float, "关键字");
reserve(Word.Char, "关键字");
reserve(Word.Boolean, "关键字");
reserve(Word.String, "关键字");
reserve(Word.Private, "关键字");
reserve(Word.Protected, "关键字");
}
void reserve(String key, String vaule) {
words.put(key, vaule);
}
/* 读文件字符 */
public void readch() throws IOException {
/* 这里应该是使用的是 */
peek = (char) reader.read();
if ((int) peek == 0xffff) {
this.isEnd = true;
}
// peek = (char)System.in.read();
}
/* 读文件下一个字符 */
public Boolean readch(char ch) throws IOException {
readch();
if (this.peek != ch) {
return false;
}
this.peek = ' ';
return true;
}
/* 程序词法分析 */
public String scan() throws IOException {
/* 消除空白 */
for (;; readch()) {
if (peek == ' ' || peek == '\t') {
rows++;
continue;
} else if (peek == '\n') {
line = line + 1;
rows = 0;
} else
break;
}
rows++;
/* 下面开始分割关键字,标识符等信息 */
switch (peek) {
/* 对于 ==, >=, <=, !=的区分使用状态机实现 */
case '=':
if (readch('=')) {
tokens.add("==");
return Word.eq;
} else {
tokens.add("=");
return Word.e;
}
case '>':
if (readch('=')) {
tokens.add(">=");
return Word.ge;
} else {
tokens.add(">");
return Word.g;
}
case '<':
if (readch('=')) {
tokens.add("<=");
return Word.le;
} else {
tokens.add("<");
return Word.l;
}
case '!':
if (readch('=')) {
tokens.add("!=");
return Word.ne;
} else {
tokens.add("!");
return Word.n;
}
case '~':
case '@':
case '$':
case '%':
case '^':
case '&':
/*
* if(peek == ' ' || peek == '\t'){ tokens.add(peek+"");
* errors.put(peek+"", new Error(line,rows,"单独的符号错误")); return
* peek+""; }
*/
String vs = "";
do {
vs += peek;
readch();
if(peek == ' ' || peek == '\t'){
break;
}
} while (true);
errors.put(vs, new Error(line, rows, "符号命名错误"));
return vs;
}
/*
* 下面是对数字的识别,根据文法的规定的话,这里的 数字只要是能够识别整数就行.
*/
if (Character.isDigit(peek)) {
int value = 0;
String vs = "";
do {
value = 10 * value + Character.digit(peek, 10);
vs += value;
readch();
} while (Character.isDigit(peek));
if ((peek >= 'a' && peek <= 'z') || (peek >= 'A' && peek <= 'Z')) {
vs = vs + peek;
checkTag(vs);
tokens.add(vs);
readch();
return vs;
} else {
tokens.add(value + "");
words.put(value + "", "数字");
return value + "";
}
}
/*
* 关键字或者是标识符的识别
*/
if (Character.isLetter(peek)) {
StringBuffer sb = new StringBuffer();
/* 首先得到整个的一个分割 */
do {
sb.append(peek);
readch();
} while (Character.isLetterOrDigit(peek));
/* 判断是关键字还是标识符 */
String s = sb.toString();
String t = words.get(s);
/* t 为关键字 */
/* 如果是关键字或者是类型的话,w不应该是空的 */
if (t != null) {
tokens.add(s);
return s; /* 说明是关键字 或者是类型名 */
}
/* 否则就是一个标识符id */
if (checkTag(s)) {
tokens.add(s);
words.put(s, "标识符");
return s;
} else {
return s;
}
}
/* peek中的任意字符都被认为是词法单元返回 */
String ss = "" + (char) peek;
// table.put(tok, "Token or Seprator");
if ((int) peek != 0xffff)
tokens.add(ss);
peek = ' ';
return ss;
}
/* 标识符规则 */
public boolean checkTag(String str) {
char beg = str.charAt(0);
/* 不能以符号开头 */
switch (beg) {
/* 不能以数字开头 */
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
errors.put(str, new Error(line, rows, "不能用数字开头"));
return false;
case '~':
case '!':
case '@':
case '$':
case '%':
case '^':
case '&':
case '*':
case '(':
case ')':
errors.put(str, new Error(line, rows, "不能用符号开头"));
return false;
default:
return true;
}
}
/* 保存error */
public void saveErrorTable() throws IOException {
FileWriter writer = new FileWriter(ERROR_TABLE);
writer.write("[符号] [错误类型] [错误位置]\n");
writer.write("\r\n");
Enumeration e = errors.keys();
while (e.hasMoreElements()) {
String key = e.nextElement();
Error err = errors.get(key);
writer.write(key + "\t\t\t" + err.getMsg() + "\t\t\t" + err.getHh() + "行" + err.getLh() + "列" + "\r\n");
}
writer.flush();
}
/* 保存Tokens */
public void saveTokens() throws IOException {
FileWriter writer = new FileWriter(TOKEN_TABLE);
writer.write("[符号] \n");
writer.write("\r\n");
for (int i = 0; i < tokens.size(); ++i) {
String tok = tokens.get(i);
/* 写入文件 */
writer.write(tok + "\r\n");
}
writer.flush();
}
/* 保存存储在table中的 */
public void saveSymbolsTable() throws IOException {
FileWriter writer = new FileWriter(SYMBOL_TABLE);
writer.write("[符号] [符号类型信息]\n");
writer.write("\r\n");
for (int i = 0; i < tokens.size(); ++i) {
String tok = tokens.get(i);
String desc = words.get(tok);
if (desc != null) {
/* 写入文件 */
writer.write(tok + "\t\t\t" + desc + "\r\n");
}
}
writer.flush();
}
}
Main.java 测试程序
public class Main {
public static void main(String[] args) throws Exception {
Lexer lexer = new Lexer("src/yuan.txt","src/symbol.txt","src/token.txt","src/error.txt");
while (lexer.getReaderState() == false) {
lexer.scan();
}
/* 保存相关信息 */
lexer.saveTokens();
lexer.saveSymbolsTable();
lexer.saveErrorTable();
System.out.println("ok");
}
}
输入的yuan.txt
public class Text{
private String @aa = "";
private int a = 111;
private int b = 2;
private int c = 3;
public static void main(String[] args){
double 0a = 1;
int s = 0 ;
s = a + b + c;
if(s == 23)
System.out.println(s);
}
}
输出结果
symbol.txt 输出的符号表
[符号] [符号类型信息]
public 标识符
class 标识符
Text 标识符
private 关键字
String 关键字
= 运算符
private 关键字
int 关键字
a 标识符
= 运算符
111 数字
private 关键字
int 关键字
b 标识符
= 运算符
2 数字
private 关键字
int 关键字
c 标识符
= 运算符
3 数字
public 标识符
static 标识符
void 标识符
main 标识符
String 关键字
args 标识符
double 关键字
= 运算符
1 数字
int 关键字
s 标识符
= 运算符
0 数字
s 标识符
= 运算符
a 标识符
b 标识符
c 标识符
if 标识符
s 标识符
== 运算符
23 数字
System 标识符
out 标识符
println 标识符
s 标识符
error.txt 输出的错误信息表
[符号] [错误类型] [错误位置]
@aa 符号命名错误 2行6列
0a 不能用数字开头 7行5列