选择部分C语言的语法成分,设计其词法分析程序,要求能够识别关键字、运算符、分界符、标识符、常量(至少是整型常量,可以自己扩充识别其他常量)等,并能处理注释、部分复合运算符(如>=等)。单词以二元式形式输出、输出有词法错误的单词及所在行号。
(1)待分析的简单的语法
关键字:begin if then while do end …
运算符和界符::= + - * / < <= > >= <> = == ; ( ) # , …
其他单词是标识符id和整型常数num,通过以下正规式定义:
id=l(l|d)* (l:letter d:digit)
num=dd*
空格、注释:在词法分析中要去掉。
(2)各种单词符号对应的种别编码(参考这张表,可以不同)
(3)待分析的源程序:
(a)int main()
{
int a=1,b=2;
b/a;
/* 注释部分*/
b>a;
c=a+b;
cout<
}
(b)这个待分析程序有词法错误
while ((a+15)>0)
{
if (2x = = 7)
i3=z;
}
####单词符号(关键字,界符,运算符)对应的种别编码表的初始化
string keyword[14] = {"","begin","if","then","while","do","end","int","main"," "," "," ","return","cout"};
string symbol[26][2] = { ":=","18","=","19","<<","20","==","21" ,"+","22","-","23","*","24","/","25","\0","1000","(","26",")","27","[","28","]","29","{","30","}","31",",","32",":","33",";","34",">","35","<","36",">=","37","<=","38","!=","40","\"","41","#","0","!","-1"};
char buf;//当前正在处理的字符
char word[32];//存放单词
int line = 1;//行数
int row = 0;//列数
int iskeyword(string s)//判断是否为关键字。关键字不区分大小写,所以要进行转换
{
for(int i = 0; i < s.length(); i++)//大写字母转换成小写字母
{
if (s[i] >= 'A'&& s[i] <= 'Z')
{
s[i] = s[i] + 32;
}
}
for (int i = 0; i < 14; i++)
{
if (keyword[i] == s)
{
return i;
}
}
return 0;
}
bool isletter()//判断当前字符是否为字母
{
if (buf >= 'a'&&buf <= 'z' || (buf >= 'A'&&buf <= 'Z'))
return true;
else
return false;
}
bool isnum()//判断当前字符是否为数字
{
if (buf >= '0'&&buf <= '9')
return true;
else
return false;
}
int issym(string s)//判断当前字母是否为界符或者运算符
{
for (int i = 0; i < 26; i++)
{
if (s == symbol[i][0])
{
return i;
}
}
return 0;
}
void judge()
{
fstream file;
file.open("C:\\Users\\lenovo\\Desktop\\test.txt");
if (file.fail())
{
cout << "文件操作失败" << endl;
return;
}
while ((buf = file.get()) != EOF)//文件读取完毕,则退出循环
{
int ii = 0;//存放单词的数组的下标
row++;
if (buf == '\n')//跳过空格和回车
{
line++;
row = 0;
//考虑到每次循环列数先加1在判断,所以赋初值为0;这样判断时就是从第一列开始。
continue;
}
else if (buf == ' ')
{
continue;
}
/********是数字*******/
else if (isnum() == true)//首字母是数字,则该单词为数字
{
word[ii] = buf;
while ((buf = file.get()) != EOF && isnum() == true)
{
word[++ii] = buf;
row++;
}
file.seekg(-1, SEEK_CUR);
//将文件的读指针从当前位置后移一个字节,因为多读了一个不是数字的字符;
// row--;
word[++ii] = '\0';
buf = file.get();
row++;
if (isletter() == true)
{
cout << "error: 在第" << line << "行有错误,错误单词:";
word[ii] = buf;
while ((isletter() == true) || (isnum() == true))
//若标识符以数字打头,则该单词有词法错误
{
buf = file.get();
ii++;
word[ii] = buf;
row++;
}
file.seekg(-1, SEEK_CUR);
//将文件的读指针从当前位置后移一个字节,因为多读了一个不是数字的字符;
// row--;
word[ii] = '\0';
cout << word << endl;
continue;
}
else
{
cout << "( 20 ,"<<word<<" )"<<endl;
}
file.seekg(-1, SEEK_CUR);
row--;
continue;
}
/***********************/
/******是标识符或者关键字******/
else if (buf == '_' || isletter() == true)//以下划线和字母开头的为关键字或标识符
{
word[ii] = buf;
while (((buf = file.get()) != EOF) && ((isletter() == true) || (isnum() == true) || (buf == '_')))
{
word[++ii] = buf;
row++;
}
file.seekg(-1, SEEK_CUR);
// row--;//因为最后一次读入没有进入循环,所以不需要-1;
//将文件的读指针从当前位置后移一个字节,因为多读了一个不是标识符或者关键字的字符;
word[++ii] = '\0';
int kind = iskeyword(word);
if (kind!= 0)//是关键字
{
cout << "( " << kind << " , " << word << " )" << endl;
}
else {
cout << "( 10 , " << word << " )" << endl;
}
continue;
}
//判断是否是注释或者除号
else if (buf == '/')//注释或者除号
{
buf = file.get();
row++;
if (buf != '*' && buf != '/')//是除号
{
file.seekg(-1, SEEK_CUR);
cout << "( 25 , / )" << endl;
row--;
continue;
}
else
{
while (1) {
buf = file.get();
row++;
while (buf != '*'&&buf != '/')//注释 “/* */”或者“// //”
//如果没有碰到注释结束符“*/”中的“*”或者“//”中的“/”,则接着跳过注释部分
{
buf = file.get();
if (buf == '\n')
{
line++;
row = 1;
}
else {
row++;
}
}
if ((buf = file.get()) == '/')//注释部分结束
{
row++;
break;
continue;
}
file.seekg(-1, SEEK_CUR);
//”*”为注释中的内容,所以回退一个字符,再循环判断是否结束。
// row--;
}
continue;
}
}
/*****判断是否是界符或者运算符***/
else
{
word[ii] = buf;
while (((buf = file.get()) != EOF) && ((buf == '=') || (buf == '<')||(buf == '>')))
//判断是否是“<=””>=””<<””>>””==”;
{
row++;
word[++ii] = buf;
}
file.seekg(-1, SEEK_CUR);
word[++ii] = '\0';
int kind = issym(word);
if ( kind!= 0)//是界符或者运算符
{ cout << "( " << symbol[kind][1] << " , " << symbol[kind][0] << " )" << endl;
}
else
{
while (((buf = file.get()) != EOF) && (buf != ' ') && (buf != '\n'))
{
cout << buf <<ii <<endl;
word[ii] = buf;
ii++;
row++;
}
word[ii] = '\0';
file.seekg(-1, SEEK_CUR);
cout << "error: 在第" << line << "行有错误,错误单词:" << word << endl;;
continue;
}
}
}
file.close();
}