1. 理解词法分析在编译程序中的作用;
2. 掌握词法分析程序的实现方法和技术
输入源程序,扫描分解字符串,通过状态转换图,识别出对应的标识符。
实验可以实现对关键字、特殊符号、运算符、常量的分析,常量可以识别出字符常量、字符串常量,同时可以删除程序中的注释。
实验亮点:对注释删除,识别字符和字符串常量,可以用于分析其他源程序,只需要改动符号链表即可
词法分析器以循环结构为主,每次读取c语言源代码文件中的数据进行判断,实现对c语言源程序的词法分析
Delimiter 界符
KeyWord 关键字
Operator 运算符
SpecialIdentifier 特殊符号
#include
/*
书籍结构体
*/
struct Books
{
char title[50];
char author[50];
char subject[100];
int book_id;
} book = {"C 语言", "RUNOOB", "编程语言", 123456};
int main()
{
printf("title : %s\nauthor: %s\nsubject: %s\nbook_id: %d\n", book.title, book.author, book.subject, book.book_id);
}
特殊符 : #
关键字 : include
界符 : <
标识符 : stdio
运算符 : .
标识符 : h
界符 : >
关键字 : struct
标识符 : Books
界符 : {
关键字 : char
标识符 : title
界符 : [
常量 : 50
界符 : ]
特殊符 : ;
关键字 : char
标识符 : author
界符 : [
常量 : 50
界符 : ]
特殊符 : ;
关键字 : char
标识符 : subject
界符 : [
常量 : 100
界符 : ]
特殊符 : ;
关键字 : int
标识符 : book_id
特殊符 : ;
界符 : }
标识符 : book
运算符 : =
界符 : {
界符 : "
字符串 : C 语言
界符 : "
运算符 : ,
界符 : "
字符串 : RUNOOB
界符 : "
运算符 : ,
界符 : "
字符串 : 编程语言
界符 : "
运算符 : ,
常量 : 123456
界符 : }
特殊符 : ;
关键字 : int
标识符 : main
界符 : (
界符 : )
界符 : {
标识符 : printf
界符 : (
界符 : "
字符串 : title : %s\nauthor: %s\nsubject: %s\nbook_id: %d\n
界符 : "
运算符 : ,
标识符 : book
运算符 : .
标识符 : title
运算符 : ,
标识符 : book
运算符 : .
标识符 : author
运算符 : ,
标识符 : book
运算符 : .
标识符 : subject
运算符 : ,
标识符 : book
运算符 : .
标识符 : book_id
界符 : )
特殊符 : ;
界符 : }
Process finished with exit code 0
本次试验总体评价优,与预期结果相符合,本实验主要用到的就是Java文件操作,不算复杂,但是主要的逻辑在于如何分析每一个词,试验过程是一个解决问题的过程,从本次试验中我学到了词法分析器的原理、如何用程序实现词法分析器,但还是有些遗憾,没有实现对浮点数的分析,总体来说很不错,加油。
package main.experiment1;
import java.io.*;
import java.util.*;
/**
* @author Diminish
* @date 2022/4/9 7:46
*/
public class WordAnalysis {
/**
* 读取到数字或字母后所暂存的字符串
* */
private static String input = "";
private static int index = 1;
/**
* 记录引号个数
* */
private static int number = 0;
private static int single = 0;
private static boolean isTheSameLine = false;
private static boolean isText = false;
private static boolean isStar = false;
private static boolean lastIsStar = false;
/**
* @param path C文件路径
* @param result 分析结果
* */
public static void analyseToAllShowCodes (String path, Map<String, Pair<String, String>> result) {
try (Reader fileReader = new FileReader(path)) {
StringBuilder stringBuilder = new StringBuilder();
// 读取到的字符
int c;
while ((c = fileReader.read()) != -1) {
stringBuilder.append((char) c);
String ch = (char) c + "";
// 判断注释
if (isText) {
// 开启了注释
switch (ch) {
case "\n":
if (!isStar) {
isText = false;
}
break;
case "*":
lastIsStar = true;
break;
case "/":
if (lastIsStar && isStar) {
isText = false;
}
break;
default:
lastIsStar = false;
break;
}
continue;
}
// 判断是数字或者字母
else if (ch.matches(NUMBER_LETTER_REGULAR_EXPRESSION)) {
// 数字或字母加入到待处理的字符串input中
input += ch;
if (!OPERATOR_STACK.empty()) {
String operator = OPERATOR_STACK.pop();
MAP.put(index++ + "", new Pair<>("运算符", operator));
}
continue;
} else if (number % 2 != 0 && !"\"".equals(ch)) {
input += ch;
continue;
} else if (number % 2 != 0) {
MAP.put(index++ + "", new Pair<>("字符串", input));
input = "";
} else if (single % 2 != 0 && !"'".equals(ch)) {
input += ch;
continue;
} else if (single % 2 != 0) {
MAP.put(index++ + "", new Pair<>("字符", input));
input = "";
}
// 判断是不是空格
if (" ".equals(ch) || "\r".equals(ch) || "\n".equals(ch)) {
if (!"".equals(input)) {
isKeyWord();
}
if (!OPERATOR_STACK.empty()) {
String operator = OPERATOR_STACK.pop();
MAP.put(index++ + "", new Pair<>("运算符", operator));
}
if ("\n".equals(ch)) {
isTheSameLine = false;
}
}
// 不是字母或者数字就进入
else {
// 判断是不是特殊符号
if (SpecialIdentifier.isSpecialIdentifier(ch)) {
// 判断是不是关键字
isKeyWord();
if ("#".equals(ch)) {
isTheSameLine = true;
}
MAP.put(index++ + "", new Pair<>("特殊符", ch));
}
// 判断是不是运算符
if (Operator.isOperator(ch)) {
// 判断是不是关键字
isKeyWord();
boolean b = "<".equals(ch) || ">".equals(ch);
if (!isTheSameLine) {
// 大于小于号是界符
if (b) {
if (!OPERATOR_STACK.empty()) {
String halfOperator = OPERATOR_STACK.pop();
if (Operator.isOperator(halfOperator + ch)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator + ch));
} else if (",".equals(halfOperator)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator));
MAP.put(index++ + "", new Pair<>("运算符", ch));
} else {
error(halfOperator + ch, stringBuilder, "错误的运算符");
return;
}
} else {
OPERATOR_STACK.push(ch);
}
} else {
if (!OPERATOR_STACK.empty()) {
String halfOperator = OPERATOR_STACK.pop();
if (Operator.isOperator(halfOperator + ch)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator + ch));
} else if (",".equals(halfOperator)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator));
MAP.put(index++ + "", new Pair<>("运算符", ch));
} else if ("//".equals(halfOperator + ch)) {
isText = true;
} else if ("/*".equals(halfOperator + ch)) {
isText = true;
isStar = true;
} else {
error(halfOperator + ch, stringBuilder, "错误的运算符");
return;
}
} else {
OPERATOR_STACK.push(ch);
}
}
} else {
if (!b) {
if (!OPERATOR_STACK.empty()) {
String halfOperator = OPERATOR_STACK.pop();
if (Operator.isOperator(halfOperator + ch)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator + ch));
} else if (",".equals(halfOperator)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator));
MAP.put(index++ + "", new Pair<>("运算符", ch));
} else if ("/*".equals(halfOperator + ch) || "//".equals(halfOperator + ch)) {
// "/*".equals(halfOperator + ch) ||
isText = true;
} else {
error(halfOperator + ch, stringBuilder, "错误的运算符");
return;
}
} else {
OPERATOR_STACK.push(ch);
}
}
}
}
// 判断是不是界符
if (Delimiter.isDelimiter(ch)) {
// 判断是不是关键字
isKeyWord();
// 栈不空
if (!IDENTIFIER_STACK.empty()) {
// 判断c的类型, 是不是左括号
if (Delimiter.isLeftDelimiter(ch)) {
// 左界符入栈
if ("<".equals(ch) || ">".equals(ch)) {
if (isTheSameLine) {
MAP.put(index++ + "", new Pair<>("界符", ch));
IDENTIFIER_STACK.push(ch);
}
} else {
MAP.put(index++ + "", new Pair<>("界符", ch));
IDENTIFIER_STACK.push(ch);
}
}
// 判断是不是引号
else if (Delimiter.isNotOrientationDelimiter(ch)) {
if ("\"".equals(ch)) {
boolean inc = false;
if (number == 0) {
number++;
inc = true;
}
// 引号为奇数
if (number % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
if (!inc) {
number = 0;
}
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
} else {
boolean inc = false;
if (single == 0) {
single++;
inc = true;
}
// 引号为奇数
if (single % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
if (!inc) {
single = 0;
}
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
}
}
// 右界符
else {
String leftDelimiter = IDENTIFIER_STACK.peek();
// 判断是否匹配
if ("<".equals(ch) || ">".equals(ch)) {
if (isTheSameLine) {
if (Delimiter.isMatch(leftDelimiter, ch)) {
IDENTIFIER_STACK.pop();
MAP.put(index++ + "", new Pair<>("界符", ch + ""));
}
}
} else {
if (Delimiter.isMatch(leftDelimiter, ch)) {
IDENTIFIER_STACK.pop();
MAP.put(index++ + "", new Pair<>("界符", ch + ""));
}
}
}
}
// 空栈
else {
if ("<".equals(ch) || ">".equals(ch)) {
if (isTheSameLine) {
// 判断是不是引号
if (Delimiter.isNotOrientationDelimiter(ch)) {
// 是不是第一次出现引号
if (number == 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
number++;
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
}
// 判断是不是左界符
else if (!Delimiter.isLeftDelimiter(ch)) {
error(ch, stringBuilder, "括号不匹配");
return;
} else {
// 左界符入栈
if (isTheSameLine) {
MAP.put(index++ + "", new Pair<>("界符", ch));
IDENTIFIER_STACK.push(ch);
}
}
}
} else {
// 判断是不是引号
if (Delimiter.isNotOrientationDelimiter(ch)) {
if ("'".equals(ch)) {
// 是不是第一次出现引号
if (single == 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
single++;
} else {
if (single % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
single = 0;
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
}
} else {
// 是不是第一次出现引号
if (number == 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
number++;
} else {
if (number % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
number = 0;
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
}
}
}
// 判断是不是左界符
else if (!Delimiter.isLeftDelimiter(ch)) {
error(ch, stringBuilder, "括号不匹配");
return;
} else {
// 左界符入栈
MAP.put(index++ + "", new Pair<>("界符", ch));
IDENTIFIER_STACK.push(ch);
}
}
}
}
}
}
if (!IDENTIFIER_STACK.empty()) {
error("当前行", stringBuilder, "括号不匹配");
return;
}
System.out.println(stringBuilder);
result = MAP;
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
/**
* @param path C文件路径
* @param result 分析结果
* */
public static void analyseToAllByFile (String path, Map<String, Pair<String, String>> result) {
try (Reader fileReader = new FileReader(path)) {
StringBuilder stringBuilder = new StringBuilder();
// 读取到的字符
int c;
while ((c = fileReader.read()) != -1) {
stringBuilder.append((char) c);
String ch = (char) c + "";
// 判断注释
if (isText) {
// 开启了注释
switch (ch) {
case "\n":
if (!isStar) {
isText = false;
}
break;
case "*":
lastIsStar = true;
break;
case "/":
if (lastIsStar && isStar) {
isText = false;
}
break;
default:
lastIsStar = false;
break;
}
continue;
}
// 判断是数字或者字母
else if (ch.matches(NUMBER_LETTER_REGULAR_EXPRESSION)) {
// 数字或字母加入到待处理的字符串input中
input += ch;
if (!OPERATOR_STACK.empty()) {
String operator = OPERATOR_STACK.pop();
MAP.put(index++ + "", new Pair<>("运算符", operator));
}
continue;
} else if (number % 2 != 0 && !"\"".equals(ch)) {
input += ch;
continue;
} else if (number % 2 != 0) {
MAP.put(index++ + "", new Pair<>("字符串", input));
input = "";
} else if (single % 2 != 0 && !"'".equals(ch)) {
input += ch;
continue;
} else if (single % 2 != 0) {
MAP.put(index++ + "", new Pair<>("字符", input));
input = "";
}
// 判断是不是空格
if (" ".equals(ch) || "\r".equals(ch) || "\n".equals(ch)) {
if (!"".equals(input)) {
isKeyWord();
}
if (!OPERATOR_STACK.empty()) {
String operator = OPERATOR_STACK.pop();
MAP.put(index++ + "", new Pair<>("运算符", operator));
}
if ("\n".equals(ch)) {
isTheSameLine = false;
}
}
// 不是字母或者数字就进入
else {
// 判断是不是特殊符号
if (SpecialIdentifier.isSpecialIdentifier(ch)) {
// 判断是不是关键字
isKeyWord();
if ("#".equals(ch)) {
isTheSameLine = true;
}
MAP.put(index++ + "", new Pair<>("特殊符", ch));
}
// 判断是不是运算符
if (Operator.isOperator(ch)) {
// 判断是不是关键字
isKeyWord();
boolean b = "<".equals(ch) || ">".equals(ch);
if (!isTheSameLine) {
// 大于小于号是界符
if (b) {
if (!OPERATOR_STACK.empty()) {
String halfOperator = OPERATOR_STACK.pop();
if (Operator.isOperator(halfOperator + ch)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator + ch));
} else if (",".equals(halfOperator)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator));
MAP.put(index++ + "", new Pair<>("运算符", ch));
} else {
error(halfOperator + ch, stringBuilder, "错误的运算符");
return;
}
} else {
OPERATOR_STACK.push(ch);
}
} else {
if (!OPERATOR_STACK.empty()) {
String halfOperator = OPERATOR_STACK.pop();
if (Operator.isOperator(halfOperator + ch)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator + ch));
} else if (",".equals(halfOperator)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator));
MAP.put(index++ + "", new Pair<>("运算符", ch));
} else if ("//".equals(halfOperator + ch)) {
isText = true;
} else if ("/*".equals(halfOperator + ch)) {
isText = true;
isStar = true;
} else {
error(halfOperator + ch, stringBuilder, "错误的运算符");
return;
}
} else {
OPERATOR_STACK.push(ch);
}
}
} else {
if (!b) {
if (!OPERATOR_STACK.empty()) {
String halfOperator = OPERATOR_STACK.pop();
if (Operator.isOperator(halfOperator + ch)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator + ch));
} else if (",".equals(halfOperator)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator));
MAP.put(index++ + "", new Pair<>("运算符", ch));
} else if ("/*".equals(halfOperator + ch) || "//".equals(halfOperator + ch)) {
// "/*".equals(halfOperator + ch) ||
isText = true;
} else {
error(halfOperator + ch, stringBuilder, "错误的运算符");
return;
}
} else {
OPERATOR_STACK.push(ch);
}
}
}
}
// 判断是不是界符
if (Delimiter.isDelimiter(ch)) {
// 判断是不是关键字
isKeyWord();
// 栈不空
if (!IDENTIFIER_STACK.empty()) {
// 判断c的类型, 是不是左括号
if (Delimiter.isLeftDelimiter(ch)) {
// 左界符入栈
if ("<".equals(ch) || ">".equals(ch)) {
if (isTheSameLine) {
MAP.put(index++ + "", new Pair<>("界符", ch));
IDENTIFIER_STACK.push(ch);
}
} else {
MAP.put(index++ + "", new Pair<>("界符", ch));
IDENTIFIER_STACK.push(ch);
}
}
// 判断是不是引号
else if (Delimiter.isNotOrientationDelimiter(ch)) {
if ("\"".equals(ch)) {
boolean inc = false;
if (number == 0) {
number++;
inc = true;
}
// 引号为奇数
if (number % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
if (!inc) {
number = 0;
}
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
} else {
boolean inc = false;
if (single == 0) {
single++;
inc = true;
}
// 引号为奇数
if (single % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
if (!inc) {
single = 0;
}
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
}
}
// 右界符
else {
String leftDelimiter = IDENTIFIER_STACK.peek();
// 判断是否匹配
if ("<".equals(ch) || ">".equals(ch)) {
if (isTheSameLine) {
if (Delimiter.isMatch(leftDelimiter, ch)) {
IDENTIFIER_STACK.pop();
MAP.put(index++ + "", new Pair<>("界符", ch + ""));
}
}
} else {
if (Delimiter.isMatch(leftDelimiter, ch)) {
IDENTIFIER_STACK.pop();
MAP.put(index++ + "", new Pair<>("界符", ch + ""));
}
}
}
}
// 空栈
else {
if ("<".equals(ch) || ">".equals(ch)) {
if (isTheSameLine) {
// 判断是不是引号
if (Delimiter.isNotOrientationDelimiter(ch)) {
// 是不是第一次出现引号
if (number == 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
number++;
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
}
// 判断是不是左界符
else if (!Delimiter.isLeftDelimiter(ch)) {
error(ch, stringBuilder, "括号不匹配");
return;
} else {
// 左界符入栈
if (isTheSameLine) {
MAP.put(index++ + "", new Pair<>("界符", ch));
IDENTIFIER_STACK.push(ch);
}
}
}
} else {
// 判断是不是引号
if (Delimiter.isNotOrientationDelimiter(ch)) {
if ("'".equals(ch)) {
// 是不是第一次出现引号
if (single == 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
single++;
} else {
if (single % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
single = 0;
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
}
} else {
// 是不是第一次出现引号
if (number == 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
number++;
} else {
if (number % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", ch));
number = 0;
} else {
error(ch, stringBuilder, "引号不匹配");
return;
}
}
}
}
// 判断是不是左界符
else if (!Delimiter.isLeftDelimiter(ch)) {
error(ch, stringBuilder, "括号不匹配");
return;
} else {
// 左界符入栈
MAP.put(index++ + "", new Pair<>("界符", ch));
IDENTIFIER_STACK.push(ch);
}
}
}
}
}
}
if (!IDENTIFIER_STACK.empty()) {
error("当前行", stringBuilder, "括号不匹配");
return;
}
if (!"".equals(input)) {
isKeyWord();
}
result = MAP;
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
/**
* 输入是通过String的方式
* */
public static void analyseToAllByString (String expression, Map<String, Pair<String, String>> result) {
StringBuilder stringBuilder = new StringBuilder();
for (int l = 0; l < expression.length(); l++) {
// 读取到的字符
String c = expression.substring(l, l + 1);
stringBuilder.append(c);
// 判断注释
if (isText) {
// 开启了注释
switch (c) {
case "\n":
if (!isStar) {
isText = false;
}
break;
case "*":
lastIsStar = true;
break;
case "/":
if (lastIsStar && isStar) {
isText = false;
}
break;
default:
lastIsStar = false;
break;
}
continue;
}
// 判断是数字或者字母
else if (c.matches(NUMBER_LETTER_REGULAR_EXPRESSION)) {
// 数字或字母加入到待处理的字符串input中
input += c;
if (!OPERATOR_STACK.empty()) {
String operator = OPERATOR_STACK.pop();
MAP.put(index++ + "", new Pair<>("运算符", operator));
}
continue;
} else if (number % 2 != 0 && !"\"".equals(c)) {
input += c;
continue;
} else if (number % 2 != 0) {
MAP.put(index++ + "", new Pair<>("字符串", input));
input = "";
} else if (single % 2 != 0 && !"'".equals(c)) {
input += c;
continue;
} else if (single % 2 != 0) {
MAP.put(index++ + "", new Pair<>("字符", input));
input = "";
}
// 判断是不是空格
if (" ".equals(c) || "\r".equals(c) || "\n".equals(c)) {
if (!"".equals(input)) {
isKeyWord();
}
if (!OPERATOR_STACK.empty()) {
String operator = OPERATOR_STACK.pop();
MAP.put(index++ + "", new Pair<>("运算符", operator));
}
if ("\n".equals(c)) {
isTheSameLine = false;
}
}
// 不是字母或者数字就进入
else {
// 判断是不是特殊符号
if (SpecialIdentifier.isSpecialIdentifier(c)) {
// 判断是不是关键字
isKeyWord();
if ("#".equals(c)) {
isTheSameLine = true;
}
MAP.put(index++ + "", new Pair<>("特殊符", c));
}
// 判断是不是运算符
if (Operator.isOperator(c)) {
// 判断是不是关键字
isKeyWord();
boolean b = "<".equals(c) || ">".equals(c);
if (!isTheSameLine) {
// 大于小于号是界符
if (b) {
if (!OPERATOR_STACK.empty()) {
String halfOperator = OPERATOR_STACK.pop();
if (Operator.isOperator(halfOperator + c)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator + c));
} else if (",".equals(halfOperator)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator));
MAP.put(index++ + "", new Pair<>("运算符", c));
} else {
error(halfOperator + c, stringBuilder, "错误的运算符");
return;
}
} else {
OPERATOR_STACK.push(c);
}
} else {
if (!OPERATOR_STACK.empty()) {
String halfOperator = OPERATOR_STACK.pop();
if (Operator.isOperator(halfOperator + c)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator + c));
} else if (",".equals(halfOperator)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator));
MAP.put(index++ + "", new Pair<>("运算符", c));
} else if ("//".equals(halfOperator + c)) {
isText = true;
} else if ("/*".equals(halfOperator + c)) {
isText = true;
isStar = true;
} else {
error(halfOperator + c, stringBuilder, "错误的运算符");
return;
}
} else {
OPERATOR_STACK.push(c);
}
}
} else {
if (!b) {
if (!OPERATOR_STACK.empty()) {
String halfOperator = OPERATOR_STACK.pop();
if (Operator.isOperator(halfOperator + c)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator + c));
} else if (",".equals(halfOperator)) {
MAP.put(index++ + "", new Pair<>("运算符", halfOperator));
MAP.put(index++ + "", new Pair<>("运算符", c));
} else if ("/*".equals(halfOperator + c) || "//".equals(halfOperator + c)) {
// "/*".equals(halfOperator + ch) ||
isText = true;
} else {
error(halfOperator + c, stringBuilder, "错误的运算符");
return;
}
} else {
OPERATOR_STACK.push(c);
}
}
}
}
// 判断是不是界符
if (Delimiter.isDelimiter(c)) {
// 判断是不是关键字
isKeyWord();
// 栈不空
if (!IDENTIFIER_STACK.empty()) {
// 判断c的类型, 是不是左括号
if (Delimiter.isLeftDelimiter(c)) {
// 左界符入栈
if ("<".equals(c) || ">".equals(c)) {
if (isTheSameLine) {
MAP.put(index++ + "", new Pair<>("界符", c));
IDENTIFIER_STACK.push(c);
}
} else {
MAP.put(index++ + "", new Pair<>("界符", c));
IDENTIFIER_STACK.push(c);
}
}
// 判断是不是引号
else if (Delimiter.isNotOrientationDelimiter(c)) {
if ("\"".equals(c)) {
boolean inc = false;
if (number == 0) {
number++;
inc = true;
}
// 引号为奇数
if (number % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", c));
if (!inc) {
number = 0;
}
} else {
error(c, stringBuilder, "引号不匹配");
return;
}
} else {
boolean inc = false;
if (single == 0) {
single++;
inc = true;
}
// 引号为奇数
if (single % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", c));
if (!inc) {
single = 0;
}
} else {
error(c, stringBuilder, "引号不匹配");
return;
}
}
}
// 右界符
else {
String leftDelimiter = IDENTIFIER_STACK.peek();
// 判断是否匹配
if ("<".equals(c) || ">".equals(c)) {
if (isTheSameLine) {
if (Delimiter.isMatch(leftDelimiter, c)) {
IDENTIFIER_STACK.pop();
MAP.put(index++ + "", new Pair<>("界符", c + ""));
}
}
} else {
if (Delimiter.isMatch(leftDelimiter, c)) {
IDENTIFIER_STACK.pop();
MAP.put(index++ + "", new Pair<>("界符", c + ""));
}
}
}
}
// 空栈
else {
if ("<".equals(c) || ">".equals(c)) {
if (isTheSameLine) {
// 判断是不是引号
if (Delimiter.isNotOrientationDelimiter(c)) {
// 是不是第一次出现引号
if (number == 0) {
MAP.put(index++ + "", new Pair<>("界符", c));
number++;
} else {
error(c, stringBuilder, "引号不匹配");
return;
}
}
// 判断是不是左界符
else if (!Delimiter.isLeftDelimiter(c)) {
error(c, stringBuilder, "括号不匹配");
return;
} else {
// 左界符入栈
if (isTheSameLine) {
MAP.put(index++ + "", new Pair<>("界符", c));
IDENTIFIER_STACK.push(c);
}
}
}
} else {
// 判断是不是引号
if (Delimiter.isNotOrientationDelimiter(c)) {
if ("'".equals(c)) {
// 是不是第一次出现引号
if (single == 0) {
MAP.put(index++ + "", new Pair<>("界符", c));
single++;
} else {
if (single % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", c));
single = 0;
} else {
error(c, stringBuilder, "引号不匹配");
return;
}
}
} else {
// 是不是第一次出现引号
if (number == 0) {
MAP.put(index++ + "", new Pair<>("界符", c));
number++;
} else {
if (number % 2 != 0) {
MAP.put(index++ + "", new Pair<>("界符", c));
number = 0;
} else {
error(c, stringBuilder, "引号不匹配");
return;
}
}
}
}
// 判断是不是左界符
else if (!Delimiter.isLeftDelimiter(c)) {
error(c, stringBuilder, "括号不匹配");
return;
} else {
// 左界符入栈
MAP.put(index++ + "", new Pair<>("界符", c));
IDENTIFIER_STACK.push(c);
}
}
}
}
}
}
if (!IDENTIFIER_STACK.empty()) {
error("当前行", stringBuilder, "括号不匹配");
return;
}
if (!"".equals(input)) {
isKeyWord();
}
result = MAP;
}
/**
* 正则表达式, 判断是不是数字, 字母, 汉字, 汉字的unicode码在 \u4e00 - \u9fa5 之间, 需要转义
* */
private static final String NUMBER_LETTER_REGULAR_EXPRESSION = "\\d|[a-z]|[A-Z]|[\\u4e00-\\u9fa5]|_";
private static final Stack<String> IDENTIFIER_STACK = new Stack<>();
private static final Stack<String> OPERATOR_STACK = new Stack<>();
private static final Map<String, Pair<String, String>> MAP = new TreeMap<>(Comparator.comparingInt(Integer::parseInt));
private static void error (String ch, StringBuilder stringBuilder, String message) {
System.out.println("错误: " + ch);
System.out.println("错误位置: " + stringBuilder);
System.out.println("错误信息: " + message);
System.out.print("标识符栈: ");
IDENTIFIER_STACK.forEach(e -> System.out.print(e + ""));
System.out.println();
System.out.print("符号栈: ");
OPERATOR_STACK.forEach(e -> System.out.print(e + ""));
printAll();
}
public static void printAll () {
for (var i : MAP.entrySet()) {
System.out.println(i.getValue().toString());
}
}
public static void printAllConstants () {
for (var i : MAP.entrySet()) {
if ("常量".equals(i.getValue().getFirst())) {
System.out.println(i.getValue().toString());
}
}
}
private static void isKeyWord () {
// 判断是不是关键字
if (KeyWord.isKeyWord(input)) {
MAP.put(index++ + "", new Pair<>("关键字", input));
} else {
if (input.matches("\\d")) {
MAP.put(index++ + "", new Pair<>("常量", input));
} else if (!"".equals(input)) {
try {
Integer.parseInt(input);
MAP.put(index++ + "", new Pair<>("常量", input));
} catch (Exception e) {
MAP.put(index++ + "", new Pair<>("标识符", input));
}
}
}
input = "";
}
}
package main;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.util.ArrayList;
/**
* @author Diminish
* @date 2022/4/9 7:57
* 关键字
*/
@Getter
@Setter
@NoArgsConstructor
public class KeyWord {
private static final ArrayList<String> KEY_WORDS = generateKeyWord();
public static boolean isKeyWord (String string) {
return KEY_WORDS.stream().anyMatch(keyWord -> keyWord.equals(string));
}
public static ArrayList<String> generateKeyWord () {
ArrayList<String> list = new ArrayList<>();
list.add("auto");
list.add("int");
list.add("double");
list.add("float");
list.add("long");
list.add("char");
list.add("signed");
list.add("unsigned");
list.add("short");
list.add("struct");
list.add("enum");
list.add("static");
list.add("switch");
list.add("case");
list.add("default");
list.add("break");
list.add("continue");
list.add("register");
list.add("const");
list.add("volatile");
list.add("typedef");
list.add("extern");
list.add("return");
list.add("void");
list.add("do");
list.add("while");
list.add("for");
list.add("if");
list.add("else");
list.add("goto");
list.add("sizeof");
list.add("include");
return list;
}
}
package main;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.lang.reflect.Array;
import java.util.ArrayList;
/**
* @author Diminish
* @date 2022/4/9 8:10
*/
@Getter
@Setter
@AllArgsConstructor
@NoArgsConstructor
public class Delimiter {
/**
* 单词种别
* */
private String type;
/**
* 界符
* */
private String delimiter;
private static final ArrayList<String> DELIMITERS = generateDelimiter();
public static boolean isDelimiter (String c) {
return DELIMITERS.stream().anyMatch(delimiter -> delimiter.equals(c));
}
public static boolean isLeftDelimiter (String c) {
return DELIMITERS.stream().limit(4).anyMatch(delimiter -> delimiter.equals(c));
}
public static boolean isNotOrientationDelimiter (String c) {
return DELIMITERS.stream().skip(8).anyMatch(delimiter -> delimiter.equals(c));
}
public static boolean isMatch (String left , String now) {
return switch (left) {
case "(" -> ")".equals(now);
case "{" -> "}".equals(now);
case "[" -> "]".equals(now);
case "<" -> ">".equals(now);
default -> false;
};
}
public static ArrayList<String> generateDelimiter () {
ArrayList<String> list = new ArrayList<>();
list.add("(");
list.add("[");
list.add("{");
list.add("<");
list.add(")");
list.add("]");
list.add("}");
list.add(">");
list.add("\"");
list.add("'");
return list;
}
}
package main;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.util.ArrayList;
import java.util.List;
/**
* @author Diminish
* @date 2022/4/9 8:27
* 特殊符号
*/
@Getter
@Setter
@NoArgsConstructor
public class SpecialIdentifier {
private static final ArrayList<String> SPECIAL_IDENTIFIERS = generateSpecialIdentifier();
public static boolean isSpecialIdentifier (String c) {
return SPECIAL_IDENTIFIERS.stream().anyMatch(delimiter -> delimiter.equals(c));
}
public static ArrayList<String> generateSpecialIdentifier () {
return new ArrayList<>(List.of(
"#", ";", ":"
));
}
}
package main;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.util.ArrayList;
/**
* @author Diminish
* @date 2022/4/9 8:16
* 运算符
*/
@Getter
@Setter
@NoArgsConstructor
public class Operator {
private static final ArrayList<String> OPERATORS = generateOperator();
public static boolean isOperator (String c) {
return OPERATORS.stream().anyMatch(delimiter -> delimiter.equals(c));
}
public static ArrayList<String> generateOperator () {
ArrayList<String> list = new ArrayList<>();
list.add("=");
list.add("+");
list.add("-");
list.add("*");
list.add("/");
list.add("%");
list.add("++");
list.add("--");
list.add("+=");
list.add("-=");
list.add("*=");
list.add("/=");
list.add("%=");
list.add("==");
list.add("!=");
list.add("<=");
list.add(">=");
list.add("<");
list.add(">");
list.add("&&");
list.add("||");
list.add("!");
list.add("&");
list.add("|");
list.add("~");
list.add(".");
list.add(",");
return list;
}
}
package main;
/**
* @author Diminish
* @date 2022/4/14 0:11
*/
public class Constants {
public static final String DELIMITER = "界符";
public static final String KEYWORD = "关键字";
public static final String OPERATOR = "运算符";
public static final String SPECIAL_IDENTIFIER = "特殊符";
public static final String IDENTIFIER = "标识符";
public static final String CONSTANT = "常量";
public static final String STRING = "字符串";
public static final String CHAR = "字符";
}
package main.experiment1;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
/**
* @author Diminish
* @date 2022/4/9 9:00
*/
@Getter
@Setter
@AllArgsConstructor
@NoArgsConstructor
public class Pair<T, E> {
T first;
E second;
@Override
public String toString() {
return first + " : " + second;
}
}
package main.experiment1;
import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;
/**
* @author Diminish
* @date 2022/4/9 9:39
*/
public class Test {
public static void main(String[] args) {
Map<String, Pair<String, String>> result = new TreeMap<>(Comparator.comparingInt(Integer::parseInt));
// String path = "C:\\Users\\a\\Desktop\\JavaTest\\CompilationPrincipleSrc\\resource\\main.c";
// WordAnalysis.analyseToAllByFile(path, result);
// WordAnalysis.printAll();
String main = "main() {return 0;}";
WordAnalysis.analyseToAllByString(main, result);
WordAnalysis.printAll();
}
}