C++词法扫描程序-编译原理实验一

程序实验功能:打开一个C++源文件,进行词法分析,输出代码中的Token+Token类型,或者对C++源文件进行压缩,去掉不必要空行、空 格、缩进、注释等,生成一个压缩版的C++代码文件,再次对这个代码文件进行词法分析,也是能正确扫描出其中的Token

界面如下:

C++词法扫描程序-编译原理实验一_第1张图片

 

 

实现的类如下:词法分析功能的类函数为:Scan(),可以返回:“词法Token+Token类型”,或者返回当前扫描出的Token

version 0.2

update:增加对源代码的压缩功能,以及订正一个特殊符号的识别

public class Scanner { /// <summary> /// 保存token的动态数组 /// </summary> public ArrayList Tokens = new ArrayList(); //保存扫描到的所有token的数组 /// <summary> /// 扫描字符下标 /// </summary> private int index; /// <summary> /// 待扫描的字符串的长度 /// </summary> private int Length; /// <summary> /// 待扫描的字符串 /// </summary> private string ContentString; /// <summary> /// DFA状态图 /// </summary> private enum DFAState { Start, Identifier, Number, Operator, Strings, Comment, Special, Done }; /// <summary> /// 对读取到的一个字符进行类型识别:数字、字符、特殊符号(3种) /// </summary> private enum CharType { digit, letter, other }; /// <summary> /// 保留字数组 /// </summary> private string[] ReservedWords = new string[]{"asm","auto","bool","break","case","catch","char","class", "const","const_cast","continue","default","delete","do","double","dynamic_cast","else","enum", "explicit","export","extern","false","float","for","friend","goto","if","inline","int","long","main", "mutable","namespace","new","operator","private","protected","public","register","reinterpret_cast", "return","short","signed","sizeof","static","static_cast","string","struct","switch","template", "this","throw","true","try","typedef","typeid","typename","union","unsigned","using","virtual","void", "volatile","wchar_t","while"}; /// <summary> /// 单个目运算符数组 /// </summary> private char[] SingleOp = new char[] { '.', ':', '+', '-', '*', '/', '%', '<', '>', '=', '!', '&', '|' }; /// <summary> /// 双目运算符(如果一个字符属于单目运算符,则再进行双目的查找) /// </summary> private string[] DoubleOp = new string[] { "->", "++", "--", "<<", ">>", "<=", ">=", "==", "!=", "&&", "||", "+=", "-=","*=", "/=", "%=", "&=", "^=", "|=" }; /// <summary> /// 特殊字符 /// </summary> private char[] SpecialChar = new char[] { '#', ',', ';', '(', ')', '[', ']', '{', '}','~' }; /// <summary> /// 保留字的list表,用以BinarySearch()查找关键字 /// </summary> private List<string> rwList = new List<string>(); /// <summary> /// 双目运算符的list表,用以BinarySearch()查找双目运算符 /// </summary> private List<string> doblopList = new List<string>(); /// <summary> /// 当前token的类型,压缩功能增加的变量 /// </summary> private DFAState currFlag = DFAState.Start; /// <summary> /// 前一个tokne的类型,压缩功能增加的变量 /// </summary> private DFAState preFlag = DFAState.Start; /// <summary> /// 构造函数 /// </summary> /// <param name="str">进行词法分析的源文件内容</param> public Scanner(string str) { index = -1; ContentString = str; Length = str.Length; foreach (string s in ReservedWords) //初始化rwList { rwList.Add(s); } rwList.Sort(); //对关键字列表进行快速排序 foreach (string s in DoubleOp) //初始化doblopList { doblopList.Add(s); } doblopList.Sort(); //对双目运算符列表进行快速排序 } /// <summary> /// 对一个扫描到的字符进行类型的判断 /// </summary> /// <param name="ch">字符</param> /// <returns>字符的类型</returns> private CharType TellaChar(char ch) { int t = (int)ch; if (t >= 48 && t <= 57) //数字 return CharType.digit; if (t >= 65 && t <= 90) //字母 return CharType.letter; if (t >= 97 && t <= 122) //字母 return CharType.letter; else return CharType.other; //可能运算符或特殊符号,或者空白或换行符 } /// <summary> /// 对一个字符进行匹配查找,判断是否单目运算符 /// </summary> /// <param name="ch">字符</param> /// <returns>如果匹配,返回true</returns> private Boolean isSingleOp(char ch) { foreach (char c in SingleOp) if (c == ch) return true; return false; } /// <summary> /// 对一个长度为2的字符串进行匹配,判断是否双目运算符 /// </summary> /// <param name="str">长度为2的字符串</param> /// <returns>如果匹配,返回true</returns> private Boolean isDoubleOp(string str) { int result = doblopList.BinarySearch(str); if (result >= 0) return true; else return false; } /// <summary> /// 判断一个字符是否为特殊字符 /// </summary> /// <param name="ch">字符</param> /// <returns>如果时候特殊符号,返回true</returns> private Boolean isSpecialChar(char ch) { foreach (char c in SpecialChar) if (ch == c) return true; return false; } /// <summary> /// 取下一个字符 /// </summary> /// <returns>字符的ASCII值</returns> private char GetNextChar() { char ch; try { index++; ch = ContentString[index]; } catch (IndexOutOfRangeException ex) { throw ex; } return ch; } /// <summary> /// 保留字查找函数 /// </summary> /// <param name="str">标识符</param> /// <returns>如果找到匹配,说明该标识符是保留字,返回true</returns> private Boolean reservedWordLookup(string str) { int result = rwList.BinarySearch(str); if (result >= 0) return true; else return false; } /// <summary> /// 词法扫描函数 version 0.2 /// </summary> /// <returns>扫描出的当前Token已经Token类型</returns> public string Scan() { string tkstString = null; //CurrentToken+CurrentState char ch; //保存读取的一个字符 CharType chartype; //字符类型 string CurrentToken = null; //输出的token DFAState CurrentState = DFAState.Start; //DFA图状态 while (index <= Length && CurrentState != DFAState.Done) { ch = GetNextChar(); //取下一个字符 chartype = TellaChar(ch); switch (CurrentState) { //开始状态的处理 case DFAState.Start: if (ch == '/t' || ch == '/r' || ch == '/n' || ch == ' ') //回车换行空白 { CurrentState = DFAState.Start; } else if (ch == '_' || chartype == CharType.letter) { CurrentToken += ch; CurrentState = DFAState.Identifier; //标识符 } else if (chartype == CharType.digit) { CurrentToken += ch; CurrentState = DFAState.Number; //无符号数字 } else if (ch == '+' || ch == '-') { CurrentToken += ch; ch = GetNextChar(); //再取一个字符进行测试 chartype = TellaChar(ch); if (chartype == CharType.digit) { char t = ContentString[index - 2]; //对前前个字符进行判断,比如a=b+3;取字符'b' CharType ct = TellaChar(t); if (ct == CharType.letter || ct == CharType.digit || t == ')' || t == ']') { index--; CurrentState = DFAState.Done; tkstString = CurrentToken + "/t运算符"; currFlag = DFAState.Operator; } else { CurrentToken += ch; CurrentState = DFAState.Number; //有符号数字,比如+5、-4 } } else { index--; //回退 CurrentState = DFAState.Operator; //运算符 } } else if (ch == '/') { CurrentToken += ch; ch = GetNextChar(); if (ch == '/' || ch == '*') { CurrentToken += ch; CurrentState = DFAState.Comment; //单行或多行注释 } else { index--; CurrentState = DFAState.Operator; //运算符 } } else if (ch == '/'' || ch == '/"') // ',''字符或字符串的开头字符 { CurrentToken += ch; CurrentState = DFAState.Strings; } else if (isSingleOp(ch)) //除了+、-、/的其他运算符 { CurrentToken += ch; CurrentState = DFAState.Operator; } else if (isSpecialChar(ch)) //特殊符号 { CurrentToken += ch; CurrentState = DFAState.Done; tkstString = CurrentToken + "/t特殊符号"; currFlag = DFAState.Special; } else { CurrentToken += ch; tkstString = CurrentToken + "/t/t程序未处理的bug!"; CurrentState = DFAState.Done; } break; //标识符状态的处理 case DFAState.Identifier: if (ch == '_' || chartype == CharType.letter || chartype == CharType.digit) { CurrentToken += ch; CurrentState = DFAState.Identifier; } else { index--; //回退一个字符 CurrentState = DFAState.Done; //读取完一个标识符 currFlag = DFAState.Identifier; //判断是否是关键字 if (reservedWordLookup(CurrentToken)) { tkstString = CurrentToken + "/t关键字"; } else { tkstString = CurrentToken + "/t标识符"; } } break; //数字的处理 case DFAState.Number: if (ch == 'E') { CurrentToken += ch; ch = GetNextChar(); if (ch == '+' || ch == '-') { CurrentToken += ch; } else { index--; } CurrentState = DFAState.Number; } else if (chartype == CharType.digit || ch == '.') { CurrentToken += ch; CurrentState = DFAState.Number; } else { index--; //回退一个字符 currFlag = DFAState.Number; CurrentState = DFAState.Done; tkstString = CurrentToken + "/t数字"; } break; //操作符的处理,单目或双目 case DFAState.Operator: string opt = CurrentToken + ch; if (isDoubleOp(opt)) //对单目运算符进行判断,是否为双目运算符 { CurrentToken += ch; //双目运算符 } else { index--; } CurrentState = DFAState.Done; tkstString = CurrentToken + "/t运算符"; currFlag = DFAState.Operator; break; //字符或字符串的处理 case DFAState.Strings: ; CurrentToken += ch; if (ch == '//') //如果字符串中含有转义字符,则下一个字符也是包含在串中 { ch = GetNextChar(); CurrentToken += ch; CurrentState = DFAState.Strings; continue; //跳出这个case,回到while循环 } if (CurrentToken.StartsWith("/'") && ch == '/'') { tkstString = CurrentToken + "/t串"; CurrentState = DFAState.Done; currFlag = DFAState.Strings; } else if (CurrentToken.StartsWith("/"") && ch == '/"') { tkstString = CurrentToken + "/t串"; CurrentState = DFAState.Done; currFlag = DFAState.Strings; } else { CurrentState = DFAState.Strings; } break; //注释行 case DFAState.Comment: if (CurrentToken.StartsWith("/*") == true)//单行或多行注释 { CurrentToken += ch; if (ch == '*') { ch = GetNextChar(); CurrentToken += ch; CurrentState = DFAState.Comment; if (ch == '/') { CurrentState = DFAState.Done; tkstString = CurrentToken + "/t注释"; currFlag = DFAState.Comment; } } else { CurrentState = DFAState.Comment; } } if (CurrentToken.StartsWith("//") == true) //单行注释 { if (ch == '/n') //换行符表示一行注释的结束 { CurrentState = DFAState.Done; tkstString = CurrentToken + "/t注释"; currFlag = DFAState.Comment; } else { CurrentToken += ch; CurrentState = DFAState.Comment; } } break; //不应该执行这里 default: CurrentToken = "ERROR SCAN HERE"; tkstString = CurrentToken + "/t出错了"; CurrentState = DFAState.Done; break; }//switch(state) }//while(state!=State.Done) if (CurrentToken != null && currFlag != DFAState.Comment) { if (currFlag != DFAState.Special && currFlag == preFlag) { Tokens.Add(" "); } preFlag = currFlag; Tokens.Add(CurrentToken); } return tkstString; //return CurrentToken; }//scan() /// <summary> /// 判断扫描是否结束 /// </summary> /// <returns></returns> public Boolean isFinishScan() { if (index <= Length) return true; else return false; } }

 

可执行文件(Lexical_Scanner.rar)下载:http://cid-780607117452312e.office.live.com/self.aspx/.Public/Lexical%20Scanner.rar

 

 

实验二链接:http://blog.csdn.net/popozhu/archive/2010/06/28/5700229.aspx

你可能感兴趣的:(C++,list,String,null,Class,token)