用了这么久的CSDN了,第一次写自己的博客,就从最近的编译原理实验开始吧,后续会把以前做过的一些实验也更新,希望能给学弟学妹们一些帮助,再当做记录自己的一些经历吧。大佬请出门右转(手动滑稽),走前留赞可否?
第一个实验老师说只占10%的比例,也就100来行代码就能解决,但奈何好久没有用过C/C++,写程序之前还特意回想了好久语法,最后写了两百多行才解决,大佬要是有闲心的话可在此基础上改一改。(文末有重点)
词法分析是对输入语句串中一个个单词符号进行分析,最后格式化输出种别码,类型,位置等信息。那么,就可以考虑一次读入一个字符将它们拼接成一个字符串,当碰到空格或者分界符(‘,’, ‘;’, ‘)’, ‘(’, ‘[’, ‘]’ )时,就把前面已读的字符串格式化输出,再输出当前分界符,然后再往后读下一个字符,直到程序尾部。
这个程序写得并不是很好,当时时间有限,就随便写写应付验收,但效果还是不错的。还可在此基础上修改修改,提高效率,缩减代码量,比如用利用字典来索引种别码,判断逻辑修改为循环而不是多个if-else等。
#include
#include
#include
#include
using namespace std;
int typeOfWord(string word);//字符串的种别码
void printsub(string word, int row, int col); //格式化输出
bool isDivider(char c);//是否是分界符
bool isrela(char c);//是否是关系运算符
bool isAlop(char c);//算术运算符
string k[8] = {
"do", "end", "for", "if", "printf", "scanf", "then", "while"};//关键字
string s[6] = {
",", ";", "(", ")", "[", "]"}; //分界符
string aop[4] = {
"+", "-", "*", "/"}; //算术运算符表
string rop[6] = {
"<", "<=", "=", ">", ">=", "<>"};//关系运算符
string identi[10] = {
"a", "i", "n"}; //标识符
int main()
{
int row, col;
char c;
string inString = "", alString = "";
row = col = 1; //从第一行第一列开始计数
FILE *fp;
fp = fopen("source.txt", "r");
if(fp == NULL)//打开操作不成功
{
printf("file open failed.\n");
return -1;
}
cout << "单词\t二元序列\t类型\t位置(行,列)\n";
cout << "\t(单词种别,单词属性)\n";
do
{
c = fgetc(fp); //读取一个字符
if(c == EOF)
break;
if(isalpha(c))//如果是字母
{
inString += c;
continue;
}
else if(isrela(c)) //关系符
{
inString += c;
continue;
}
else if(isdigit(c)) //数字
{
inString += c;
continue;
}
else if (c == ' ') //如果是空格,就执行输出程序
{
printsub(inString, row, col);
inString = "";
col += 1; //列加1
continue;
}
else if (c == '\n') //换行符
{
row += 1;
col = 1;
continue;
}
else if(isAlop(c)) //算数运算符
{
if(inString.size() != 0)
{
printsub(inString, row, col); //先把前一个字符输出
inString = "";
col += 1; //列加1
}
alString += c;
}
else if(isDivider(c))
{
if(inString.size() != 0) //如果inString长度不为0,就先输出它
{
printsub(inString, row, col); //先把前一个字符输出
inString = "";
col += 1; //列加1
}
if(alString.size() != 0)//如果alString长度不为0,就再输出它
{
printsub(alString, row, col);
col += 1;
alString = "";
}
//然后再输出当前分界符
inString = c;
printsub(inString, row, col);
inString = "";
col += 1; //列加1
}
else //else啥都不是,就是error
{
printf("%6s\t%6s\t%9s\t(%d,%d)\n",inString.c_str(), "Error", "Error", row, col);
col += 1;
}
}while(!feof(fp));
return 0;
}
int typeOfWord(string word)
{
int res = 0, i;
bool found = false;
if(!found)
for(i = 0; i < 8; i++) //判断是否为关键字
{
if (k[i] == word)
{
res = 1;
found = true;
break;
}
}
//判断是否为分界符
if(!found)
for (i = 0; i < 6; i++)
{
if (s[i] == word)
{
res = 2;
found = true;
break;
}
}
//判断算术运算符
if(!found)
for (i = 0; i < 4; i++)
{
if (aop[i] == word)
{
res = 3;
found = true;
break;
}
}
//关系运算符
if(!found)
for (i = 0; i < 6; i++)
{
if (rop[i] == word)
{
res = 4;
found = true;
break;
}
}
//判断无符号数
bool isDigit = true;
if(!found)
{
for (i = 0; i < int(word.size()); i++)
{
if (!isdigit(word[i]))
{
isDigit = false;
break;
}
}
if (isDigit)
{
found = true;
res = 5;
}
else
res = -1;
}
//查标识符
bool isIden = false;
if(!found)
{
for (i = 0;i < 10; i++)
{
if (word == identi[i])
{
isIden = true;
break;
}
}
if(isIden)
res = 6;
else res = -1;
}
return res;
}
//格式化输出
void printsub(string word, int row, int col)
{
int type = typeOfWord(word);
switch (type)
{
case 1:
printf("%6s\t(%d,%3s)\t%10s\t(%d,%d)\n",word.c_str(), type, word.c_str(), "关键字", row, col);
break;
case 2:
printf("%6s\t(%d,%3s)\t%10s\t(%d,%d)\n",word.c_str(), type, word.c_str(), "分界符", row, col);
break;
case 3:
printf("%6s\t(%d,%3s)\t%10s\t(%d,%d)\n",word.c_str(), type, word.c_str(), "算数运算符", row, col);
break;
case 4:
printf("%6s\t(%d,%3s)\t%10s\t(%d,%d)\n",word.c_str(), type, word.c_str(), "关系运算符", row, col);
break;
case 5:
printf("%6s\t(%d,%3s)\t%10s\t(%d,%d)\n",word.c_str(), type, word.c_str(), "常数", row, col);
break;
case 6:
printf("%6s\t(%d,%3s)\t%10s\t(%d,%d)\n",word.c_str(), type, word.c_str(), "标识符", row, col);
break;
case -1:
printf("%6s\t%6s\t%9s\t(%d,%d)\n",word.c_str(), "Error", "Error", row, col);
break;
}
}
bool isrela(char c)
{
bool res = false;
if (c == '>')
res = true;
else if(c == '<')
res = true;
else if(c == '=')
res = true;
return res;
}
bool isAlop(char c)//这里就可以修改成循环的形式
{
bool res = false;
if (c == '+')
res = true;
else if(c == '-')
res = true;
else if(c == '*')
res = true;
else if (c == '/')
res = true;
return res;
}
bool isDivider(char c)
{
char s[] = {
',', ';', '(', ')', '[', ']'};
bool res = false;
for(int i = 0; i < 6; i++)
{
if(s[i] == c)
{
res = true;
break;
}
}
return res;
}
这个实验最坑的是把++运算符也识别为错误,验收时老师可能会问这个怎么实现的,还有就是后面识别3b这种为错是怎么实现的,换成3b45之类的还能不能识别,就这两点需要注意。
如果这篇文章对你有帮助的话,能否动动小手打个赏呢?下一篇是实验二LL1分析。886