一个标识符对应一个机内码
相同标识符对应一个机内码
存储时:他们会找到相同的bucket位置,发生碰撞,因为HashMap使用链表存储对象(每个Map.Entry都有一个next指针),这个Entry会存储在链表中。
获取时:会用hashCode找到bucket位置,然后调用key.equals()方法找到链表中正确的节点.最终找到要找的值对象.
减少碰撞:使用final修饰的对象、或不可变的对象作为键,使用(Integer、String)
动态扩容: 理论上,只要内存足够,是完全可以放下所有程序里需要的标识符的
产生hash碰撞的元素会放到相同hash值里的链表下
查询效率:O(1),插入效率O(1)
就是以31为权,每一位为字符的ASCII值进行运算,用自然溢出来等效取模。
哈希计算公式可以计为s[0]*31^(n-1) + s[1]*31^(n-2) + … + s[n-1]
产生一个标识符->用这个标识符的hashcode去到hashmap里去查是否存在
->若存在,则得到这个标识符的机内码
->若不存在,则生成一个这个标识符的机内码并放入hashmap
本文表示形式:var#id
public class WordAnalyze {
private static HashMap keywords = new HashMap<>();
static {
//初始化 关键字对应的机内码
int i = 0;
keywords.put("int", ++i);
keywords.put("float", ++i);
keywords.put("char", ++i);
keywords.put("if", ++i);
keywords.put("else", ++i);
keywords.put("for", ++i);
keywords.put("while", ++i);
keywords.put("return", ++i);
keywords.put("break", ++i);
keywords.put("continue", ++i);
keywords.put("switch", ++i);
keywords.put("case", ++i);
keywords.put("default", ++i);
keywords.put("double", ++i);
keywords.put("void", ++i);
keywords.put("struct", ++i);
keywords.put("static", ++i);
keywords.put("do", ++i);
keywords.put("short", ++i);
keywords.put("+", ++i);
keywords.put("*", ++i);
keywords.put("/", ++i);
keywords.put("%", ++i);
keywords.put("=", ++i);
keywords.put(">", ++i);
keywords.put("<", ++i);
keywords.put("!", ++i);
keywords.put("==", ++i);
keywords.put("!=", ++i);
keywords.put(">=", ++i);
keywords.put("<=", ++i);
keywords.put("++", ++i);
keywords.put("--", ++i);
keywords.put("&", ++i);
keywords.put("&&", ++i);
keywords.put("||", ++i);
keywords.put("[", ++i);
keywords.put("]", ++i);
keywords.put(",", ++i);
keywords.put(";", ++i);
keywords.put("(", ++i);
keywords.put(")", ++i);
keywords.put("{", ++i);
keywords.put("}", ++i);
keywords.put("\"", ++i);
//单双引号 自己加,java只有单引号
keywords.put("", ++i);
keywords.put(":", ++i);
keywords.put("#", ++i);
keywords.put(">>", ++i);
keywords.put("<<", ++i);
System.out.println("一共" + i + " 个关键字");
System.out.println("常数类别标识51, 机内码为二进制表示");
System.out.println("关键字类别标识52, 机内码暂定 ");
System.out.println("==========================================");
}
private FileWriter os;
private char ch;
private HashMap identifier = new HashMap<>();
private int identifierCount = 0;
public WordAnalyze() {
try {
File file = new File("目标文件");
if (file.exists()){
file.createNewFile();
}
os = new FileWriter(file, true);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws Exception {
File file = new File("源文件");//定义一个file对象,用来初始化FileReader
FileReader reader = new FileReader(file);//定义一个fileReader对象,用来初始化BufferedReader
int length = (int) file.length();
//这里定义字符数组的时候需要多定义一个,因为词法分析器会遇到超前读取一个字符的时候,如果是最后一个
//字符被读取,如果在读取下一个字符就会出现越界的异常
char buf[] = new char[length + 1];
reader.read(buf);
reader.close();
new WordAnalyze().analyze(buf);
}
//判断是否是关键字
boolean isKey(String str) {
if (keywords.containsKey(str))
return true;
return false;
}
//判断是否是字母
boolean isLetter(char letter) {
if ((letter >= 'a' && letter <= 'z') || (letter >= 'A' && letter <= 'Z'))
return true;
else
return false;
}
//判断是否是数字
boolean isDigit(char digit) {
if (digit >= '0' && digit <= '9')
return true;
else
return false;
}
//词法分析
void analyze(char[] chars) {
String arr = "";
for (int i = 0; i < chars.length; i++) {
ch = chars[i];
arr = "";
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
} else if (isLetter(ch)) {
while (isLetter(ch) || isDigit(ch)) {
arr += ch;
ch = chars[++i];
}
//回退一个字符
i--;
if (isKey(arr)) {
//关键字
// System.out.println(arr + " " + keywords.get(arr));
writefile(arr + " " + keywords.get(arr) + "\n");
} else {
//标识符
if (identifier.containsKey(arr)) {
//如果已经存在这个标识符号了,就get出来
// System.out.println(arr + " 52 " + identifier.get(arr));
writefile(arr + " 52 " + identifier.get(arr) + "\n");
} else {
//不存在这个标识符号,就存入并生成一个机内码
identifierCount++;
identifier.put(arr, "var#" + identifierCount);
// System.out.println(arr + " 52 " + identifier.get(arr));
writefile(arr + " 52 " + identifier.get(arr) + "\n");
}
}
} else if (isDigit(ch) || (ch == '.')) {
while (isDigit(ch) || (ch == '.' && isDigit(chars[++i]))) {
if (ch == '.') i--;
arr = arr + ch;
ch = chars[++i];
i--;
}
//属于无符号常数
writefile(arr + " 51 " + Integer.toBinaryString(Integer.parseInt(arr))+"\n");
// System.out.println(arr + " 51 " + Integer.toBinaryString(Integer.parseInt(arr)));
} else switch (ch) {
//运算符
case '+':{
ch = chars[++i];
if (ch == '+')
// System.out.println("++" + " " + keywords.get("++"));
writefile("++" + " " + keywords.get("++") + "\n");
else {
// System.out.println("=" + " " + keywords.get("+"));
writefile("+" + " " + keywords.get("+") + "\n");
i--;
}
}
break;
case '-': {
ch = chars[++i];
if (ch == '-')
// System.out.println("--" + " " + keywords.get("--"));
writefile("--" + " " + keywords.get("--") + "\n");
else {
// System.out.println("-" + " " + keywords.get("-"));
writefile("-" + " " + keywords.get("-") + "\n");
i--;
}
}
break;
case '=': {
ch = chars[++i];
if (ch == '=')
// System.out.println("==" + " " + keywords.get("=="));
writefile("==" + " " + keywords.get("==") + "\n");
else {
// System.out.println("=" + " " + keywords.get("="));
writefile("=" + " " + keywords.get("=") + "\n");
i--;
}
}
break;
case ':': {
// System.out.println(":" + " " + keywords.get(":"));
writefile(":" + " " + keywords.get(":") + "\n");
}
break;
case '>': {
ch = chars[++i];
if (ch == '=')
// System.out.println(">=" + " " + keywords.get(">="));
writefile(">=" + " " + keywords.get(">=") + "\n");
else if (ch == '>')
// System.out.println(">>" + " " + keywords.get(">>"));
writefile(">>" + " " + keywords.get(">>") + "\n");
else {
// System.out.println(">" + " " + keywords.get(">"));
writefile(">" + " " + keywords.get(">") + "\n");
i--;
}
}
break;
case '<': {
ch = chars[++i];
if (ch == '=')
// System.out.println("<=" + " " + keywords.get("<="));
writefile("<=" + " " + keywords.get("<=") + "\n");
else if (ch == '<')
// System.out.println("<<" + " " + keywords.get("<<"));
writefile("<<" + " " + keywords.get("<<") + "\n");
else {
// System.out.println("<" + " " + keywords.get("<"));
writefile("<" + " " + keywords.get("<") + "\n");
i--;
}
}
break;
//默认一些
default:
if (keywords.containsKey(String.valueOf(ch))) {
// System.out.println(ch + " " + keywords.get(String.valueOf(ch)));
writefile(ch + " " + keywords.get(String.valueOf(ch)) + "\n");
}
}
}
//在分析结束后,关闭流
try {
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public void writefile(String content) {
try {
os.write(content);
} catch (IOException e) {
e.printStackTrace();
}
}
}
单字符Keyword可随意增加,双字符keyword需要修改代码,这样也有了一点点可扩展性;
要想添加双字符keyword,得在case里面自己加,当然是得看懂代码前提下
有时间其实可以用设计模式之策略模式优化以下,当然了,肯定是没时间了
对了,这个还可以用有限状态机来写,这才是最正规的做法
可以搜搜 如何用代码实现有限状态机,这是字符串操作算法里面的一种思路,leetcode碰到就hard级了