“单词”分类说明
标识符(Identifier):变量名和函数名(字母或下划线开头);
关键字(Keyword):系统保留字;
运算符(Operator): + - * / % = == != < <= > >= 等;
分隔符(Separator): , ; . ' " ( ) [ ] { } // /* */ #等;
常量(Constant): 字符串或字符常量;
注释(Note): 注释不参与编译。
程序清单:
/*
========================================================
词法分析器V1.0
(DFA,C语言描述,可分析C/C++词法)
Author:Estrong
2012.05.10
========================================================
*/
/*
说明:
标识符(Identifier):变量名和函数名(字母或下划线开头);
关键字(Keyword):具有特定的功能系统保留字,不能移作他用;
运算符(Operator): + - * / % = == != < <= > >= 等;
分隔符(Separator): , ; . ' " ( ) [ ] { } // /米 米/ #等;
常量(Constant):字符串或字符常量;
注释(Note): 注释不参与编译。
*/
#include "stdio.h"
#include "string.h"
#define MAX_STR 256
#define MAX_KEYWORDS 62
#define MAX_OPERATORS 12
#define MAX_SEPARATORS 15
/* 定义分析状态 */
#define STA_START 1
#define STA_IDorKEYWORD 2 /* IDENTIFIER: 标识符 */
#define STA_NUMBER 3 /* NUMBER: 数字 */
#define STA_NOTE 4 /* NOTE: 注释 */
#define STA_CONSTANT 5 /* CONSTANT: 常量 */
#define STA_DONE 6 /* DONE: 完成 */
/* 定义所属类型 */
#define TYPE_KEYWORD 1 /* KEYWORD: 保留字 */
#define TYPE_IDENTIFIER 2 /* IDENTIFIER: 标识符 */
#define TYPE_NUMBER 3 /* NUMBER: 数字 */
#define TYPE_NOTE 4 /* NOTE: 注释 */
#define TYPE_CONSTANT 5 /* CONSTANT: 常量 */
#define TYPE_OPERATOR 6 /* OPERATOR: 运算符 */
#define TYPE_SEPARATOR 7 /* SEPARATOR: 分隔符 */
#define TYPE_ERROR 8 /* ERROR: 错误 */
#define TYPE_UNKNOWN 9 /* UNKNOWN: 未知 */
#define TYPE_ENDFILE 10 /* ENDFILE: 文件结束 */
char *Operators[MAX_OPERATORS] = {"+","-","*","/","%","=","==","!=","<","<=",">",">="};
char *Separators[MAX_SEPARATORS] ={",",";",".","\'","\"","(",")","[","]","{","}","//","/*","*/","#"};
char *Keywords[MAX_KEYWORDS] = {"include","define","auto","bool","break","case","catch","char","class",
"const","const_cast","continue","default","delete","do","double",
"dynamic_cast","else","enum","explicit","extern","false","float","for",
"friend","goto","if","inline","int","long","mutable","namespace","new",
"operator","private","protected","public","register","reinterpret_cast",
"return","short","signed","sizeof","static","static_cast","struct",
"switch","template","this","throw","true","try","typedef","typeid",
"typename","union","unsigned","using","virtual","void","volatile","while"};
/* 是否为运算符 */
int IsOperator(char c)
{
int i;
for(i=0;i if(Operators[i][0]==c) return 1; return 0; } /* 是否为分隔符 */ int IsSeparator(char c) { int i; for(i=0;i if(Separators[i][0]==c) return 1; return 0; } /* 是否为保留字 */ int IsKeyword(char *str) { int i; for(i=0;i if(strcmp(Keywords[i],str)==0) return 1; return 0; } /* DONE一次输出一次 */ void OutputOneDone(FILE *outf,int type,char *str) { if(IsKeyword(str)==1) type=TYPE_KEYWORD; switch(type) { case TYPE_KEYWORD: fprintf(outf," KEYWORD: ");break; case TYPE_IDENTIFIER: fprintf(outf," IDENTIFIER: ");break; case TYPE_NUMBER: fprintf(outf," NUMBER: ");break; case TYPE_NOTE: fprintf(outf," NOTE: ");break; case TYPE_CONSTANT: fprintf(outf," CONSTANT: ");break; case TYPE_OPERATOR: fprintf(outf," OPERATOR: ");break; case TYPE_SEPARATOR: fprintf(outf," SEPARATOR: ");break; case TYPE_ERROR: fprintf(outf," ERROR: ");break; case TYPE_UNKNOWN: fprintf(outf," UNKNOWN: ");break; default:break; } fprintf(outf,"%s\n",str); } /* DFA词法分析函数 */ void LexAnalyse(FILE *inf,FILE *outf) { char c; char str[MAX_STR];/* 过程字符串 */ int i; int line_no=1;/* 行号 */ int state,type; char flag_limit_one_line;/* 标志为 / * 注释 范围是一行 */ char flag_had_got_dot;/* 用于限定小数中只能有一个小数点 */ fprintf(outf,"Line %d--------------------------------------\n",line_no); while(!feof(inf)) { i=0; state=STA_START; flag_limit_one_line=0; flag_had_got_dot=0; while(state!=STA_DONE) { c=fgetc(inf); switch(state) { case STA_START: if( c==' ' || c=='\t'); else if(c=='\n') { line_no++; fprintf(outf,"Line %d--------------------------------------\n",line_no); } else if( (c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_') { state=STA_IDorKEYWORD; type=TYPE_IDENTIFIER; str[i]=c; i++; } else if(c>='0' && c<='9') { state=STA_NUMBER; type=TYPE_NUMBER; str[i]=c; i++; } else if(c=='/') { str[i]=c; i++; c=fgetc(inf); if(c=='/') /* //注释 */ { flag_limit_one_line=1; state=STA_NOTE; type=TYPE_NOTE; str[i]=c; i++; } else if(c=='*') /* / * 注释开始 */ { state=STA_NOTE; type=TYPE_NOTE; str[i]=c; i++; } else { state=STA_DONE; type=TYPE_OPERATOR; fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */ i=1;/* str[1]='\0'; */ } } else if(c=='<' || c=='>') { state=STA_DONE; type=TYPE_OPERATOR; str[0]=c; c=fgetc(inf); if(c=='=') { str[1]='=';i=2;/* str[2]='\0' */ } else/* 暂未考虑<<,>>位移运算符的识别 */ { fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */ i=1;/* str[1]='\0'; */ } } else if(c=='!') { state=STA_DONE; str[0]=c; c=fgetc(inf); if(c=='=') /* 是!= */ { type=TYPE_OPERATOR; str[1]='=';i=2;/* str[2]='\0' */ } else /* 非!= */ { type=TYPE_UNKNOWN; fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */ i=1;/* str[1]='\0'; */ } } else if(c=='\"' || c=='\'') { state=STA_CONSTANT; type=TYPE_CONSTANT; str[0]=c; i=1; } else if(IsOperator(c)) { state=STA_DONE; type=TYPE_OPERATOR; str[0]=c; i=1;/* str[1]='\0'; */ } else if(IsSeparator(c)) { state=STA_DONE; type=TYPE_SEPARATOR; str[0]=c; i=1;/* str[1]='\0'; */ } else if(c==EOF) { state=STA_DONE; type=TYPE_ENDFILE; } else { state=STA_DONE; type=TYPE_UNKNOWN; } break;/* case STA */ case STA_IDorKEYWORD: if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9') || c=='_' ) { str[i]=c; i++; } else if(c=='.') { str[i]=c; i++; c=fgetc(inf); if((c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_') { str[i]=c; i++; /* type=TYPE_IDENTIFIER; */ } else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c)) { fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */ state=STA_DONE; type=TYPE_ERROR; /* 如 date. 或 date.1 标记为ERROR类型 */ } else { str[i]=c; i++; type=TYPE_ERROR; } } else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c)) { state=STA_DONE; fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */ } else { state=STA_DONE; type=TYPE_ERROR; } break;/* case STA */ case STA_NUMBER: if(c>='0' && c<='9') { str[i]=c; i++; } else if(c=='.') /* 小数识别 */ { str[i]=c; i++; c=fgetc(inf); if(flag_had_got_dot==0) { if(c>='0' && c<='9') { str[i]=c; i++; /* type=TYPE_NUMBER; */ flag_had_got_dot=1; } else /* if(c==' ' || c=='\t' || c=='\n' || c>='0' && c<='9' || ...) */ { fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */ state=STA_DONE; type=TYPE_ERROR; /* 如 date. 或 date.1 标记为ERROR类型 */ } } else { type=TYPE_ERROR; str[i]=c; i++; } } else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c)) { state=STA_DONE; fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */ } else { str[i]=c; i++; type=TYPE_ERROR; } break;/* case STA */ case STA_NOTE: if(flag_limit_one_line==1) /* 是/ * 注释, 限定一行 */ { if(c=='\n') { state=STA_DONE; fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */ } else { str[i]=c; i++; } } else /* 是 / * 注释 */ { if(feof(inf)) { state=STA_DONE; type=TYPE_ERROR; } else if(c=='\n') { line_no++; str[i]=c; i++; } else if(c=='*') { str[i]=c; i++; c=fgetc(inf); if(c=='/') { state=STA_DONE; str[i]=c; i++; } else { if(feof(inf)) { state=STA_DONE; type=TYPE_ERROR; } fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */ } } else { str[i]=c; i++; } } break;/* case STA */ case STA_CONSTANT: if(feof(inf)) { state=STA_DONE; type=TYPE_ERROR; } else if(c=='\n') { line_no++; str[i]=c; i++; } else if(c=='\"' || c=='\'') { state=STA_DONE; str[i]=c; i++; } else if(c=='\\') { str[i]=c; i++; c=fgetc(inf); str[i]=c; i++; } else { str[i]=c; i++; } break;/* case STA */ case STA_DONE: break;/* case STA */ default: break;/* case STA */ } }/* state=STA_DONE */ str[i]='\0'; OutputOneDone(outf,type,str);/* DONE一次输出一次 */ }/* feof(inf) */ } /* 主函数 */ main() { FILE *input,*output; if((input=fopen("input.c","r"))==NULL) { printf("Cannot find the file!\nStrike any key to exit!\n"); system("pause"); exit(1); } else { output=fopen("output.c","w"); LexAnalyse(input,output); fprintf(output,"----------------------------------------------END OF FILE!\n"); fclose(input); fclose(output); printf("Lexical Analyzer has finished the analyzation!\nFor more information please see the file output.txt.\n"); system("pause"); } }