自己写的版本:
问题:
1:考虑到由于是嵌套接收,浮点型感觉少麻烦,因为还要判断是否两个小数点等,古没有实现
2:对于一些特殊符号,如三元运算符,格式控制符%d等分析不到位
3:类别应该分的更细,否则用唯一的symbol(sym)标识的话无法进行后续的语法分析
4:没有用文件指针,数据输入要在控制台,不利于交互
#include <iostream> #include <string.h> #include <cstdio> using namespace std; int index,sub_index,num,sym; //num输出数字,超过int认为Error,index为s的索引,sun_index为str的索引,sym为symbol种别码 char ch; char s[50000+10]; //以cin.get(ch)的形式来接收控制台的字符,遇到#结束(\n,空格不结束) char str[15]; //存储要输出的除数字外的字符(或字符串),数字用num输出 char keyword[27][10]={ "auto","break","case","char","int","long","double","float","const", "continue","default","do","else","enum","extern","for","goto","return", "sizeof","static","struct","switch","typedef","union","unsigned","void","while" };//暂时写27种关键字,是用于处理c语言的基础关键字,估计还是不全的 void function() { for(int i=0;i<15;i++)str[i]=NULL; ch=s[index++]; //由于cin.get不丢弃缓冲区的空格和回车,故此处人工忽略 while(ch==' ' || ch=='\n' || ch=='\t' || ch=='\r' || ch=='\f') { ch=s[index++]; } //如果是字母开头,判断其是否是标示符 if( (ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z') ||ch=='_' ){ sub_index=0; while((ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z') || (ch<='9'&&ch>='0') ||ch=='_'){ str[sub_index++]=ch; ch=s[index++]; } index--; //减回去 sym=3; for(int i=0;i<27;i++) //再判断是否为关键字 if(!strcmp(str,keyword[i])){ sym=1; break; } } //数字处理 else if(ch<='9'&&ch>='0'){ num=0; while(ch<='9'&&ch>='0') { num*=10; num+=ch-'0'; ch=s[index++]; } index--; sym=2; if((num+1>(1<<15)) || (num<(-1<<15)))sym=-1; } //对于注释符"//"的处理,文档注释不写了吧,道理差不多,找到匹配后即重新递归 else if(ch=='/'){ sub_index=0; str[sub_index++]=ch; ch=s[index++]; if(ch=='/'){ while(ch!='\n') { ch=s[index++]; } function();//从注释符的下一行从新进行function函数 } } //<= 和<< 的处理 else if(ch=='<'){ sub_index=0; str[sub_index++]=ch; ch=s[index++]; if(ch=='=' || ch=='<'){ str[sub_index++]=ch; } else index--; sym=4; } //>= 和>> 的处理 else if(ch=='>'){ sub_index=0; str[sub_index++]=ch; ch=s[index++]; if(ch=='=' || ch=='>'){ str[sub_index++]=ch; } else index--; sym=4; } // != 和*= 和&=和^=和|=的处理 else if(ch=='!'||ch=='*'||ch=='/'||ch=='&'||ch=='^'||ch=='|'){ sub_index=0; str[sub_index++]=ch; ch=s[index++]; if(ch=='='){ str[sub_index++]=ch; } else index--; sym=4; } // += 和++的处理 else if(ch=='+'){ sub_index=0; str[sub_index++]=ch; ch=s[index++]; if(ch=='=' || ch=='+'){ str[sub_index++]=ch; } else index--; sym=4; } // -= 和--的处理 else if(ch=='-'){ sub_index=0; str[sub_index++]=ch; ch=s[index++]; if(ch=='=' || ch=='-' || ch=='>'){ str[sub_index++]=ch; } else index--; sym=4; } // 分界符的处理 else if(ch=='(' || ch==')' || ch==';' || ch=='<' || ch=='>' || ch=='[' || ch==']' || ch==',' || ch=='{' || ch=='}'){ sym=5; str[0]=ch; } // 结束符的处理 else if(ch=='#'){ sym=0; str[0]=ch; } //其他没考虑到的运算符号,如位运算等,不再一一累述 else{ sym=4; str[0]=ch; } } int main() { //freopen("in.txt","r",stdin); // freopen("out.txt","w",stdout); cout << "Please input the program code:(over with'#') "<< endl; index=0; do{ cin.get(ch); s[index++]=ch; }while(ch!='#'); index=0; do{ function(); if(sym==-1)cout << "Error" <<endl; else if(sym==2)cout<<"["<<sym<<","<<num<<"]"<<endl; else cout <<"["<<sym<<","<<str<<"]"<<endl; }while(sym!=0); return 0; }
看到一个别人实现的不错版本:
1 调用ctype虽然简易,但在一些类型符的判断上会出现一些小错误
2 txt文件光标不能直接放在文件末(当然通过改主函数也可以避免问题~)
3 所有标识符用二维数组处理很好,整齐简便,又有文件读写操作,健壮性强,值得学习。
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <malloc.h> #include <conio.h> #define NULL 0 #define true 1 #define false 0 FILE *fp; char ch; char *keyword[34]={"auto","break","case","char","const","continue","default","do","double", "else","enum","extern","float","for","goto","if","int","long","register", "return","short","signed","sizeof","static","struct","switch","typedef", "printf", "union","unsigned","void","volatile","while","main"};//关键字 char *operatornum[6]={"+","-","*","/","++","--"};//算术运算符 char *comparison[8]={"<","<=","=",">",">=","<>","==","!="};//比较符 char *interpunction[8]={",",";",":=",".","(",")","{","}"};//分隔符 char *biaoshifu[6]={"%","$","^","&","_","#"};//特殊标识符 char *zhushifu[3]={"//",""};//注释符 char *luoji[3]={"&&","||","!"};//逻辑运算符 ////////////////////////////////////////////////////////////////////////////////////////// char search(char searchstr[],int wordtype) { int i; switch (wordtype) { case 1:for(i=0;i<=33;i++) { if(strcmp(keyword[i],searchstr)==0) return(true); } case 2:{ for(i=0;i<=5;i++) { if(strcmp(operatornum[i],searchstr)==0) return(true); } break; } case 3: { for(i=0;i<=7;i++) { if(strcmp(comparison[i],searchstr)==0) return(true); } break; } case 4: for(i=0;i<=7;i++) { if(strcmp(interpunction[i],searchstr)==0) return(true); } break; case 5: for(i=0;i<=5;i++) { if(strcmp(biaoshifu[i],searchstr)==0) return(true); }break; case 6: for(i=0;i<=2;i++) { if(strcmp(zhushifu[i],searchstr)==0) return(true); }break; case 7: for(i=0;i<=2;i++) { if(strcmp(luoji[i],searchstr)==0) return(true); }break; } return(false); } //////////////////////// char letterprocess (char ch)//字母处理函数 { int i=-1; char letter[20]; while (isalnum(ch)!=0) { letter[++i]=ch; ch=fgetc(fp); }; letter[i+1]='\0'; if (search(letter,1)) { if(strcmp(letter,"main"))//letter 不等于"main" printf("<1,关键字,%s>\n",letter);//关键字识别码1 else printf("<2,自定义标识符,%s>\n",letter);//关键字识别码1 } else { printf("<2,自定义标识符,%s>\n",letter);//自定义标识符识别码2 } return(ch); } ///////////////////////// char numberprocess(char ch)//数字处理程序 { int i=-1; char num[20]; while (isdigit(ch)!=0) { num[++i]=ch; ch=fgetc(fp); } if(isalpha(ch)!=0) { while(isspace(ch)==0) { num[++i]=ch; ch=fgetc(fp); } num[i+1]='\0'; printf("错误!非法标识符:%s\n",num); goto u; } num[i+1]='\0'; printf("<3,数字,%s>\n",num);//数字识别码3 u: return(ch); } ///////////////////////// char otherprocess(char ch) { int i=-1; char other[20]; if (isspace(ch)!=0) { ch=fgetc(fp); goto u; } while ((isspace(ch)==0)&&(isalnum(ch)==0)) { other[++i]=ch; ch=fgetc(fp); } other[i+1]='\0'; if (search(other,2)) printf("<4,运算符,%s>\n",other);//算术运算符识别码4 else if (search(other,3)) printf("<4,运算符,%s>\n",other);//关系运算符号 识别码4 else if (search(other,4)) printf("<5,分隔符号,%s>\n",other);//分隔符号识别码5 else if (search(other,5)) printf("<%s,特殊标识符号>\n",other); else if (search(other,6)) printf("<%s,注释符号>\n",other); else if (search(other,7)) printf("<%s,逻辑运算符号>\n",other); else printf("错误!非法字符:%s\n",other); u: return (ch); } /////////////////////////// void main () { char str,c; printf("**********************************词法分析器************************************\n"); if((fp=fopen("F:\\写的代码\\tt\\源程序.txt","r"))==NULL) printf("源程序无法打开!\n"); else { str =fgetc(fp); while (str!=EOF) { if (isalpha(str)!=0) str=letterprocess(str); else { if (isdigit(str)!=0) str=numberprocess(str); else str=otherprocess(str); } }; fclose(fp); fp=NULL; printf("词法分析结束,谢谢使用!\n"); printf("点任意键退出!\n"); } c=getch(); }