1.在判断空行的时候,java里面用 line == "" 不好使,调试发现进不去if,然后用line.equals("")就好使。
2.java标准化输出,可以有:System.out.printf("%-10s\t<ERROR:标识符重复!>\n",token);这种写法!printf啊,但是可以不能输出到文件中。不过我们可以这么写:
output.write(String.format("%-10s\t<%s,-->",token,token));
String.format 救了我们哦~~
3.输出到文件中怎么换行呢? output.write("空行~\r\n"); 呵呵,win下是\r\n哦,linux下\n。。。
4.传说中的符号表的C语音代码实现:http://blog.163.com/ppt_compiler/blog/static/20281300720125120041966/
===================================================================================
如何解读这个看起来很糟糕的基本没啥注释的代码呢?
1.看清楚结构,结构如下:
(1)读入一行line,把line转成char[] 的strLine数组,然后每次处理一个字符ch(看红色代码,所有的处理都在for里面)。
(2)然后对每个ch进行分类:if else if else if 。。。建议每次看一个if{}就不会头晕啦
2.看清楚算法,这个是基于很精巧的“状态转移图”的程序,我拿个数字处理的代码讲解下:
那么我们就建立个二维数组来实现这个状态的转移:
123456
1 d.#e##
2 ##d###
3 ##de##
4 ####-d
5 #####d
6 #####d
我们忽略0状态,因为我们已经进入了。
状态1到状态1有矢量连接,所以数组d[1][1] = 'd'
状态1到状态2有矢量连接,所以数组d[1][2] = '.'
依次类推,没有矢量的就标为'#',然后关键代码如下:
int s = 1; Boolean isfloat = false; while (ch != '\0'&& (isDigit(ch) || ch == '.' || ch == 'e' || ch == '-')) { if (ch == '.' || ch == 'e') isfloat = true; int k; for (k = 1; k <= 6; k++) { char tmpstr[] = digitDFA[s].toCharArray(); if (ch != '#'&& 1 == in_digitDFA(ch, tmpstr[k])) { token += ch; s = k; break; } } if (k > 6) break; ch = strLine[++i]; }当循环退出的时候(k为6),然后s是状态,当状态为 1 , 3 ,6 的时候是正常退出
为 2 ,4 ,5的时候是有错误地退出。
=====================================================================
我的code.txt:
int a="a; main() { int b =99A1; int a= 999; int c='a'; int abc = "hahah"; /*你妹啊*/ //你好啊 print("Hello World!\n");//你又好了 return 0;/*你妹啊*/ }
我的输出:
line : 1 int <int,--> a <标识符,(a,入口:0)> = <=,--> "a ERROR:字符串常量引号不封闭 ; <;,--> line : 2 main <标识符,(main,入口:1)> ( <(,--> ) <),--> line : 3 { <{,--> line : 4 int <int,--> b <标识符,(b,入口:2)> = <=,--> 99A1 ERROR:请确保实常数输入正确 ; <;,--> line : 5 int <int,--> a <ERROR:标识符重复!> = <=,--> 999 <实型常量,999> ; <;,--> line : 6 int <int,--> c <标识符,(c,入口:3)> = <=,--> 'a' <字符常量,a> ; <;,--> line : 7 int <int,--> abc <标识符,(abc,入口:4)> = <=,--> "hahah" <字符串常量,hahah> ; <;,--> line : 8 /*你妹啊*/ (注释:/*你妹啊*/) line : 9 //你好啊 (注释://你好啊) line : 10 空行~ line : 11 print <标识符,(print,入口:5)> ( <(,--> "Hello World!\n" <字符串常量,Hello World!\n> ) <),--> ; <;,--> //你又好了 (注释://你又好了) line : 12 return <return,--> 0 <实型常量,0> ; <;,--> /*你妹啊*/ (注释:/*你妹啊*/) line : 13 } <},-->
package ouyang; import java.io.*; import java.util.*; public class AnalysisCodeToWord { public static void main(String args[]) { String infile = "code.txt"; String outfile = "out.txt"; try { FileInputStream f = new FileInputStream(infile); BufferedReader dr = new BufferedReader(new InputStreamReader(f)); BufferedWriter output = new BufferedWriter(new FileWriter(outfile)); String line = ""; int cnt = 0; while ((line = dr.readLine()) != null) { cnt++; if (cnt == 1) { System.out.println("line : " + cnt); output.write(String.format("line : %d\r\n", cnt)); } else { System.out.println("\n\nline : " + cnt); output.write(String.format("\r\n\r\nline : %d\r\n", cnt)); } if (line.equals("")) { System.out.println("空行~"); output.write("空行~\r\n"); } else { char[] strLine = line.toCharArray(); for (int i = 0; i < strLine.length; i++) { char ch = strLine[i]; String token = ""; if (isAlpha(ch)) // 判断关键字和标识符 { do { token += ch; i++; if(i>=strLine.length) break; ch = strLine[i]; } while (ch != '\0' && (isAlpha(ch) || isDigit(ch))); --i; // 指针回退 if (isMatchKeyword(token.toString())) // 是关键字 { System.out.printf("%-10s\t<%s,-->\n", token, token); output.write(String.format( "%-10s\t<%s,-->\r\n", token, token)); } else // 是标识符 { if (symbol.isEmpty() || (!symbol.isEmpty() && !symbol .containsKey(token))) { symbol.put(token, symbol_pos); System.out.printf( "%-10s\t<标识符,(%s,入口:%d)>\n", token, token, symbol_pos); output.write(String.format( "%-10s\t<标识符,(%s,入口:%d)>\r\n", token, token, symbol_pos)); symbol_pos++; } else { System.out.printf( "%-10s\t<ERROR:标识符重复!>\n", token); output .write(String .format( "%-10s\t<ERROR:标识符重复!>\r\n", token)); } } token = ""; } else if (isDigit(ch)) // 判断数字常量 { int s = 1; Boolean isfloat = false; while (ch != '\0' && (isDigit(ch) || ch == '.' || ch == 'e' || ch == '-')) { if (ch == '.' || ch == 'e') isfloat = true; int k; for (k = 1; k <= 6; k++) { char tmpstr[] = digitDFA[s].toCharArray(); if (ch != '#' && 1 == in_digitDFA(ch, tmpstr[k])) { token += ch; s = k; break; } } if (k > 6) break; i++;if(i>=strLine.length) break; ch = strLine[i]; } // if(ch) --i; // 指针回退 Boolean haveMistake = false; if (s == 2 || s == 4 || s == 5) { haveMistake = true; } else // 1,3,6 { if (!isOp(ch) || ch == '.') haveMistake = true; } if (haveMistake) // 错误处理 { while (ch != '\0' && ch != ',' && ch != ';' && ch != ' ') // 一直到“可分割”的字符结束 { token += ch; i++;if(i>=strLine.length) break; ch = strLine[i]; } System.out.printf("%-10s\tERROR:请确保实常数输入正确\n", token); output.write(String.format( "%-10s\tERROR:请确保实常数输入正确!\r\n", token)); } else { if (isfloat) { System.out.printf("%-10s\t<实型常量,%s>\n", token, token); output.write(String.format( "%-10s\t<实型常量,%s>\r\n", token, token)); } else { System.out.printf("%-10s\t<实型常量,%s>\n", token, token); output.write(String.format( "%-10s\t<整型常量,%s>\r\n", token, token)); } } --i; token = ""; } else if (ch == '\'') // 识别字符常量,类似处理字符串常量。 { int s = 0; Boolean haveMistake = false; String token1 = ""; token1 += ch; while (s != 3) { i++;if(i>=strLine.length) break; ch = strLine[i]; if (ch == '\0') { haveMistake = true; break; } for (int k = 0; k < 4; k++) { char tmpstr[] = stConDFA[s].toCharArray(); if (in_sinStConDFA(ch, tmpstr[k])) { token1 += ch; // 为输出 if (k == 2 && s == 1) { if (isEsSt(ch)) // 是转义字符 token = token + '\\' + ch; else token += ch; } else if (k != 3 && k != 1) token += ch; s = k; break; } } } if (haveMistake) { System.out.printf("%s\tERROR:字符常量引号不封闭\n", token1); output.write(String.format( "%s\tERROR:字符常量引号不封闭\r\n", token1)); --i; } else { if (token.length() == 1) { System.out.printf("%-10s\t<字符常量,%s>\n", token1, token); output.write(String.format( "%-10s\t<字符常量,%s>\r\n", token1, token)); } else if (token.length() == 2) { if (isEsSt(token.charAt(1)) && token.charAt(0) == '\\') { System.out.printf("%-10s\t<字符常量,%s>\n", token1, token); output.write(String.format( "%-10s\t<字符常量,%s>\r\n", token1, token)); } } } token = ""; } else if (ch == '"') // 处理字符串常量的 { String token1 = ""; token1 += ch; int s = 0; Boolean haveMistake = false; while (s != 3 ) { i++; if(i>=strLine.length-1) { haveMistake = true; break; } ch = strLine[i]; if (ch == '\0') { haveMistake = true; break; } for (int k = 0; k < 4; k++) { char tmpstr[] = stConDFA[s].toCharArray(); if (in_stConDFA(ch, tmpstr[k])) { token1 += ch; if (k == 2 && s == 1) { if (isEsSt(ch)) // 是转义字符 token = token + '\\' + ch; else token += ch; } else if (k != 3 && k != 1) token += ch; s = k; break; } } } if (haveMistake) { System.out.printf("%-10s\tERROR:字符串常量引号不封闭\n", token1); output.write(String.format( "%-10s\tERROR:字符串常量引号不封闭\n", token1)); --i; } else { System.out.printf("%-10s\t<字符串常量,%s>\n", token1, token); output .write(String.format( "%-10s\t<字符串常量,%s>\r\n", token1, token)); } token = ""; } else if (isOp(ch)) // 运算符,界符 { token += ch; if (isPlusEqu(ch)) // 后面可以用一个"=" { i++;if(i>=strLine.length) break; ch = strLine[i]; if (ch == '=') token += ch; else { if (isPlusSame(strLine[i - 1]) && ch == strLine[i - 1]) token += ch; // 后面可以用一个和自己一样的 else { --i; } } } System.out.printf("%-10s\t<%s,-->\n", token, token); output.write(String.format("%-10s\t<%s,-->\r\n", token, token)); token = ""; } else if (ch == '/') // 注释+除号: 注释只要识别出来就好。 { token += ch; i++;if(i>=strLine.length) break; ch = strLine[i]; if (ch != '*' && ch != '/') // 除号处理 { if (ch == '=') token += ch; // /= else { --i; // 指针回退 // / } System.out.printf("%-10s\t<%s,-->\n", token, token); output.write(String.format("%-10s\t<%s,-->\n", token, token)); token = ""; } else // 注释可能是‘//’也可能是‘/*’ { Boolean haveMistake = false; if (ch == '*') { token += ch; // ch == '*' int s = 2; while (s != 4) { i++;if(i>=strLine.length) break; ch = strLine[i]; // 注意判断溢出! if (ch == '\0') { haveMistake = true; break; } for (int k = 2; k <= 4; k++) { char tmpstr[] = noteDFA[s] .toCharArray(); if (1 == in_noteDFA(ch, tmpstr[k], s)) { token += ch; s = k; break; } } } } else if(ch == '/') //这里就不用状态转移了... { int index = line.lastIndexOf("//"); String tmpstr=line.substring(index); int tmpint = tmpstr.length(); for(int k=0;k<tmpint;k++) { i++; } token = tmpstr; } System.out.printf("%-10s\t", token); output.write(String.format("%-10s\t", token)); if (haveMistake) { System.out.printf("ERROR:注释没有封闭\n"); output.write("ERROR:注释没有封闭\r\n"); --i; } else { System.out.printf("(注释:%s)\n", token); output.write(String.format("(注释:%s)\n", token)); } token = ""; } } else // 一些很奇怪的字符 { if(ch != ' ' && ch != '\t') { System.out.printf("%-10c ERROR:存在不合法字符\n",ch); output.write(String.format("%-10c ERROR:存在不合法字符\n",ch)); } } } } } f.close(); dr.close(); output.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static Boolean isAlpha(char ch) { return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'); } public static Boolean isDigit(char ch) { return (ch >= '0' && ch <= '9'); } public static Boolean isMatchKeyword(String str) { Boolean flag = false; for (int i = 0; i < 32; i++) { if (str.equals(keywords[i])) { flag = true; break; } } return flag; } public static Boolean isOp(char ch) // 判断是否是运算符 { for (int i = 0; i < 22; i++) if (ch == oper[i]) { return true; } return false; } public static int in_digitDFA(char ch, char dD) { if (dD == 'd') { if (isDigit(ch)) return 1; else return 0; } return (ch == dD) ? 1 : 0; } public static Boolean in_stConDFA(char ch, char key) { if (key == 'a') return true; if (key == '\\') return ch == key; if (key == '"') return ch == key; if (key == 'd') return ch != '\\' && ch != '"'; return false; } public static Boolean in_sinStConDFA(char ch, char key) { if (key == 'a') return true; if (key == '\\') return ch == key; if (key == '"') return ch == '\''; if (key == 'd') return ch != '\\' && ch != '\''; return false; } public static Boolean isPlusEqu(char ch) // 运算符后可加等于 { return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '=' || ch == '>' || ch == '<' || ch == '&' || ch == '|' || ch == '^'; } public static Boolean isPlusSame(char ch) // 可以连续两个运算符一样 { return ch == '+' || ch == '-' || ch == '&' || ch == '|'; } public static Boolean isEsSt(char ch) { return ch == 'a' || ch == 'b' || ch == 'f' || ch == 'n' || ch == 'r' || ch == 't' || ch == 'v' || ch == '?' || ch == '0'; } public static int in_noteDFA(char ch, char nD, int s) { if (s == 2) { if (nD == 'c') { if (ch != '*') return 1; else return 0; } } if (s == 3) { if (nD == 'c') { if (ch != '*' && ch != '/') return 1; else return 0; } } return (ch == nD) ? 1 : 0; } public static String code = ""; public static Map<String, Integer> symbol = new HashMap<String, Integer>();// =new // HashMap<String,int>; public static int symbol_pos = 0; // 32个 public static String keywords[] = { "auto", "double", "int", "struct", "break", "else", "long", "switch", "case", "enum", "register", "typedef", "char", "extern", "return", "union", "const", "float", "short", "unsigned", "continue", "for", "signed", "void", "default", "goto", "sizeof", "volatile", "do", "if", "while", "static" }; // 7个 public static String digitDFA[] = { "#", "#d.#e##", "###d###", "###de##", "#####-d", "######d", "######d" }; // 22个 public static char oper[] = { '+', '-', '*', '=', '<', '>', '&', '|', '~', '^', '!', '(', ')', '[', ']', '{', '}', '%', ';', ',', '#', '.' }; // 4个 public static String stConDFA[] = { "#\\d#", "##a#", "#\\d\"", "####" }; // 4个 public static String noteDFA[] = { "#", "##*##", "##c*#", "##c*/", "#####" }; }