词法分析程序

内容:

选择部分C语言的语法成分,设计其词法分析程序,要求能够识别关键字、运算符、分界符、标识符、常量(至少是整型常量,可以自己扩充识别其他常量)等,并能处理注释、部分复合运算符(如>=等)。单词以二元式形式输出、输出有词法错误的单词及所在行号。

要求:

(1)待分析的简单的语法
关键字:begin if then while do end …
运算符和界符::= + - * / < <= > >= <> = == ; ( ) # , …
其他单词是标识符id和整型常数num,通过以下正规式定义:
id=l(l|d)* (l:letter d:digit)
num=dd*
空格、注释:在词法分析中要去掉。
(2)各种单词符号对应的种别编码(参考这张表,可以不同)
词法分析程序_第1张图片
(3)待分析的源程序:
(a)int main()
{
int a=1,b=2;
b/a;
/* 注释部分*/
b>a;
c=a+b;
cout< return 0;
}
(b)这个待分析程序有词法错误
while ((a+15)>0)
{
if (2x = = 7)
i3=z;
}

核心代码

####单词符号(关键字,界符,运算符)对应的种别编码表的初始化
string keyword[14] = {"","begin","if","then","while","do","end","int","main"," "," "," ","return","cout"};
string symbol[26][2] = { ":=","18","=","19","<<","20","==","21" ,"+","22","-","23","*","24","/","25","\0","1000","(","26",")","27","[","28","]","29","{","30","}","31",",","32",":","33",";","34",">","35","<","36",">=","37","<=","38","!=","40","\"","41","#","0","!","-1"};
char buf;//当前正在处理的字符
char word[32];//存放单词
int line = 1;//行数
int row = 0;//列数
  1. 功能说明:查表判断该字符串是否为关键字
  2. 输入:要判断的字符串
  3. 输出:如果为关键字返回其对应的种别编码,不是关键字返回0。
int iskeyword(string s)//判断是否为关键字。关键字不区分大小写,所以要进行转换
{
	for(int i = 0; i < s.length(); i++)//大写字母转换成小写字母
	{
		if (s[i] >= 'A'&& s[i] <= 'Z')
		{
			s[i] = s[i] + 32;
		}
	}
	for (int i = 0; i < 14; i++)
	{ 
		if (keyword[i] == s)
		{
			return i;
		}	
	}
	return 0;	
} 

  1. 功能说明:判断当前字符是否为字母
  2. 输出:是字母返回true,不是则返回false
bool isletter()//判断当前字符是否为字母
{
	if (buf >= 'a'&&buf <= 'z' || (buf >= 'A'&&buf <= 'Z'))
		return true;
	else
		return false;
}
  1. 功能说明:判断当前字符是否为数字
  2. 输出:是数字返回true,不是则返回false
bool isnum()//判断当前字符是否为数字
{
	if (buf >= '0'&&buf <= '9')
		return true;
	else
		return false;
}
  1. 功能说明:查表判断当前单词是否是界符或者运算符
  2. 输入:要进行判断的单词
  3. 输出:如果是界符或者运算符返回其对应的种别编码,不是则返回0。
int issym(string s)//判断当前字母是否为界符或者运算符
{
	for (int i = 0; i < 26; i++)
	{
		if (s == symbol[i][0])
		{
				return i;
		}
	}
	return 0;
}	
  1. 功能说明:打开”test.txt”文件,若文件打不开,输出错误信息,直接返回;否则判断文件中所有的单词。判断过程:循环依次判断是否为回车,空格(若是,则跳过),数字(若标识符以数字打头,则输出错误信息),关键字,标识符,注释或是除号(若是注释则跳过),界符或运算符,以上都不是,则输出错误信息。
  2. 输出:二元组(种别编码,单词),若有错误,则输出错误所在的行号及错误单词。
void judge()
{
	fstream file;
	file.open("C:\\Users\\lenovo\\Desktop\\test.txt");
	if (file.fail())
	{
		cout << "文件操作失败" << endl;
		return;
	}
	while ((buf = file.get()) != EOF)//文件读取完毕,则退出循环
	{
		int ii = 0;//存放单词的数组的下标
		row++;
		if (buf == '\n')//跳过空格和回车
		{
			line++;
			row = 0;
//考虑到每次循环列数先加1在判断,所以赋初值为0;这样判断时就是从第一列开始。
			continue;
		}
		else if (buf == ' ')
		{	
			continue;
		}
		/********是数字*******/
		else if (isnum() == true)//首字母是数字,则该单词为数字
		{
			word[ii] = buf;
			while ((buf = file.get()) != EOF && isnum() == true)
			{
				word[++ii] = buf;
				row++;
			}
			file.seekg(-1, SEEK_CUR);
			//将文件的读指针从当前位置后移一个字节,因为多读了一个不是数字的字符;
		    //	row--;
			word[++ii] = '\0';
			buf = file.get();
			row++;
			if (isletter() == true)
			{	
				cout << "error: 在第" << line << "行有错误,错误单词:";
				word[ii] = buf;
				while ((isletter() == true) || (isnum() == true))
//若标识符以数字打头,则该单词有词法错误
				{
					buf = file.get();
					ii++;
					word[ii] = buf;
					row++;
				}
				file.seekg(-1, SEEK_CUR);
				//将文件的读指针从当前位置后移一个字节,因为多读了一个不是数字的字符;
			    //	row--;
				word[ii] = '\0';
				cout << word << endl;
				continue;
			}
			else
			{
			cout << "( 20 ,"<<word<<" )"<<endl;
 }
			file.seekg(-1, SEEK_CUR);
			row--;
			continue;
		}
		/***********************/
	   /******是标识符或者关键字******/
		else if (buf == '_' || isletter() == true)//以下划线和字母开头的为关键字或标识符
		{
			word[ii] = buf;
			while (((buf = file.get()) != EOF) && ((isletter() == true) || (isnum() == true) || (buf == '_')))
			{
				word[++ii] = buf;
				row++;
			}
			file.seekg(-1, SEEK_CUR);
		//	row--;//因为最后一次读入没有进入循环,所以不需要-1;
			//将文件的读指针从当前位置后移一个字节,因为多读了一个不是标识符或者关键字的字符;
			word[++ii] = '\0';
			int kind = iskeyword(word);
			if (kind!= 0)//是关键字
			{
				cout << "( " << kind << " , " << word << " )" << endl;
			}
			else {
				cout << "( 10 , " << word << " )" << endl;
			}
			continue;
		}
		//判断是否是注释或者除号
		else if (buf == '/')//注释或者除号
		{
			buf = file.get();
			row++;
			if (buf != '*' && buf != '/')//是除号
			{
				file.seekg(-1, SEEK_CUR);
				cout << "( 25 , / )" << endl;
				row--;
				continue;
			}
			else
			{
					while (1) {
						buf = file.get();
						row++;
						while (buf != '*'&&buf != '/')//注释 “/*  */”或者“// //”
//如果没有碰到注释结束符“*/”中的“*”或者“//”中的“/”,则接着跳过注释部分
						{
							buf = file.get();
							if (buf == '\n')
							{
								line++;
								row = 1;
							}
							else {
								row++;
							}
						}
						if ((buf = file.get()) == '/')//注释部分结束
						{
							row++;
							break;
							continue;
						}
						file.seekg(-1, SEEK_CUR);
//”*”为注释中的内容,所以回退一个字符,再循环判断是否结束。
						//	row--;
					}
				continue;
			}
		}
		/*****判断是否是界符或者运算符***/
		else
		{
			word[ii] = buf;
			while (((buf = file.get()) != EOF) && ((buf == '=') || (buf == '<')||(buf == '>')))
//判断是否是“<=””>=””<<””>>””==”;
			{
				row++;
				word[++ii] = buf;
			}
			file.seekg(-1, SEEK_CUR);
			word[++ii] = '\0';
			int kind = issym(word);
			if ( kind!= 0)//是界符或者运算符
			{	cout << "( " << symbol[kind][1] << " , " << symbol[kind][0] << " )" << endl;
			}
			else
			{
				while (((buf = file.get()) != EOF) && (buf != ' ') && (buf != '\n'))
				{
					cout << buf <<ii <<endl;
					word[ii] = buf;
					ii++;
					row++;
				}
				word[ii] = '\0';
				file.seekg(-1, SEEK_CUR);
				cout << "error: 在第" << line << "行有错误,错误单词:" << word << endl;;
				continue;
			}
		}
	}
	file.close();
}

运行截图

词法分析程序_第2张图片
词法分析程序_第3张图片

你可能感兴趣的:(c++,编译器)