C语言词法分析程序的设计与实现

C语言词法分析程序 c++和lex两种实现 支持多种数字格式和转义字符

实现的功能

  1. 基本满足C语言的词法规则。
  2. 可以识别八进制,十六进制,浮点,科学计数法,同时支持后缀。
  3. 识别关键字。
  4. 识别字符和字符串中的转义。

C++实现程序设计说明

自动机实现

​ 一般自动机实现采用如下代码结构:

int state = 0;
switch(state){
    case 0:
        dosomeThing();
        state = NextState;
        break;
    case 1:
        ...
}

这样的结构确实很规范,但是我觉得用起来有一点问题。

  1. state = 2 这代表着跳转到状态2,这样的可读性不强,因为很难记住状态2是什么状态。可以把state变量改为enum来增强可读性。
  2. 实现状态机每个状态的跳转一般用switch语句,如果有涉及到判断是< 还是<=就需要提前读取一个字符增加状态机,就会有三重switch嵌套,影响可读性。

所以我采用了一般不推荐的goto语句。只要保证实现的状态机没有逻辑错误,使用goto不会造成复杂的结构。

stateName1:
	dosomething;
	...
    goto stateName1;
	...
    goto stateName2;
stateName2:
	...

自动机举例

识别注释

/
*
/
=
其它
其它
*
其它
/
其它
'\n'
开始
/=
/
块注释
行注释

识别标识符

_\a-z\A-Z
_\a-z\A-Z\0-9
其它
开始
标识符

识别数字

l/L
l/L
u/U
u/U
其它
其它
其它
u/U
其它
其它
整数后缀
数字

实际使用msvc编译器发现,L和U的顺序其实没有限制,LuL也是合法的数字后缀。如果用自动机实现所有的情况,需要上面的自动机三个。所以实际实现并没有采用自动机,采用的是检测L和U出现的次数。

1-9
1-9
'\.'
E/e
十进制
整数后缀
浮点数
科学计数法
0
X/x
0-9a-fA-F
0-7
0-7
8/9
8/9
l/L/u/U
l/L/u/U
'\.'
'\.'
e/E
e/E
e/E
0-9
'-/+'/0-9
0-9
f/F/l/L
f/F/l/L
f/F/l/L
'\.'
开始
十六进制
八进制
temp
处理整数后缀
浮点数
科学计数法
处理浮点后缀

'\.'代表小数点,因为我不会再图里打小数点的转义

三个图是连起来的,名字相同的是同一个状态。处理后缀是专门的统计后缀每个字符出现的次数的处理程序,没有用自动机。

识别字符

'
除了',\,'\n'
'
\
转义字符
0-7
X/x
'
0-7
0-7
0-7
0-9a-fA-F
0-9a-fA-F
字符

转义有\n,\t,还有\0,\000三位八进制和\xhh两位十六进制

处理运算符

运算符的处理就比较简单了,只需要看后面能否组成符合运算符或者变成其它运算符。比如<开头的运算符有<<,<<=,<=<这几种运算符,自动机如下

<
其它
=
<
其它
=
开始
<
<=
<<
<<=

需要注意的是/,它既是注释的开头字符,也是/,/=的开头。

关键字

采用stl的unordermap来保存关键字,识别到标识符之后,判断该标识符是否在map中,如果在,则是关键字。

错误的处理

遇到不符合自动机规则的字符之后,输出一个错误信息,先跳过该字符串或者数字,就是一直跳过字符,知道遇到空白符,运算符。

lex实现

lex实现主要是把每一类单词转变成一个正则表达式,代码如下:

digit 		[0-9]
digit8		[0-7]
digit16		[0-9a-fA-F]
postfix		((u|U)?(l|L)?(l|L)?)|((l|L)?(u|U)?(l|L)?)|((l|L)?(l|L)?(u|U)?)
postfixf	(l|L)|(f|F)
letter 		[A-Za-z_]
note 		"//"[^\n]*
notes 		"/*"([^\*]|(\*)*[^\*/])*"*/"
id 			{letter}({letter}|{digit})*
char		(\\[abfnrlvt\'\"\?\\])|(\\[0-7][0-7])|("\\x"{digit16}{digit16})
achar		\'([^\n\'\\]|{char})\'
string		\"([^\n\\\"]|{char})*\"
number		[1-9]{digit}*{postfix}?
number8		0{digit8}*{postfix}?
number16	("0x"|"0X"){digit16}+{postfix}?
numberSCM	{digit}*\.?{digit}+(\e|\E)(\+|\-)?{digit}*{postfixf}?
numberF		{digit}*\.{digit}+{postfixf}?
operator	[\?\~\,\(\)\{\}\[\]\;\:\.]

注意:我使用的是在linux上翻译lex文件,然后在windows上开发。用--nounistd选项可以不使用linux系统头文件.使用-+选项生成c++文件。

c++代码

#include 
#include 

using std::ifstream;
using std::unordered_map;
using std::string;
using std::cout;
#define MaxLen 2048
#define C_DIGIT \
	case '0':case'1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
#define C_LETTER \
	case 'A':case'B':case 'C':case 'D':case'E':case 'F':case 'G':	\
	case 'H':case'I':case 'J':case 'K':case'L':case 'M':case 'N':	\
	case 'O':case'P':case 'Q':case 'R':case'S':case 'T':				\
	case 'U':case'V':case 'W':case 'X':case'Y':case 'Z':				\
																	\
	case 'a':case'b':case 'c':case 'd':case'e':case 'f':case 'g':	\
	case 'h':case'i':case 'j':case 'k':case'l':case 'm':case 'n':	\
	case 'o':case'p':case 'q':case 'r':case's':case 't':				\
	case 'u':case'v':case 'w':case 'x':case'y':case 'z':
#define C_BLANK \
	case ' ':case '\t':case '\v':case '\f':case '\n':
#define C_HEX_DIGIT \
	case '0':case'1':case '2':case '3':case'4':case '5':case '6':case '7':case'8':case '9':		\
	case 'A':case'B':case 'C':case 'D':case'E':case 'F':	\
	case 'a':case'b':case 'c':case 'd':case'e':case 'f':
#define C_OCT_DIGIT \
	case '0':case'1':case '2':case '3':case'4':case '5':case '6':case '7':
#define C_NUMBER_POSTFIX \
	case 'l':case 'L':case 'u':case 'U':
#define C_OPERATOR_SINGLE \
	case ',':case';':case '?':case '~':case '(':case ')':	\
	case '[':case']':case '{':case '}':
#define C_OPERATOR \
	case '+':case'-':case '*':case '/':case '&':case '|':case ':':	\
	case '=':case'^':case '!':case '#':case '>':case '<':
#define C_SPLITE C_BLANK C_OPERATOR C_OPERATOR_SINGLE

unordered_map<string, int> KeyTable = { 
	{"asm",0},{"auto",1},{"bool",2},{"break",3},{"case",4},{"catch",5},{"char",6},
	{"class",7},{"const",8},{"continue",9},{"default",10},{"delete",11},{"do",12},{"double",13},
	{"else",14},{"enum",15},{"explicit",16},{"export",17},{"extern",18},{"false",19},{"float",20},
	{"for",21},{"friend",22},{"goto",23},{"if",24},{"inline",25},{"int",26},{"long",27},
	{"namespace",28},{"mutable",29},{"new",30},{"operator",31},{"private",32},{"protected",33},{"public",35},
	{"register",36},{"return",37},{"signed",38},{"sizeof",39},{"static",40},{"struct",41},{"switch",42},
	{"template",43},{"this",44},{"throw",45},{"true",46},{"try",47},{"typedef",48},{"void",49},
	{"typeid",50},{"typename",51},{"union",52},{"unsigned",53},{"using",54},{"virtual",55},{"volatile",56}
};

unordered_map<string, int> Operator = {
	{"/=",0} ,{"<=",1}, {"<<",2},{"<<=",3} ,{">=",4}, {">>=",5},{">>",6} ,{"%=",7}, {"...",9},
	{"+=",10}, {"++",11} ,{"->",12}, {"-=",13}, {"--",14}, {"||",15},{"\=",16},{"::",17},{"*=",18},
	{"==",19},{"!=",20},{"^=",21},{"##",22}
};

enum CPPTYPE
{
	CPP_NULL,
	CPP_NAME,
	CPP_NUMBER_DEC,
	CPP_NUMBER_SCM,
	CPP_NUMBER_OCT,
	CPP_NUMBER_HEX,
	CPP_NUMBER_FLOAT,
	CPP_OPERATOR,
	CPP_OPERATOR_COM,
	CPP_CHAR,
	CPP_STRING,
	CPP_NOTES,
	CPP_KEY
};

const char* CPPPTYPE_NAME[13] = { "错误","标识符","十进制数","科学计数法浮点数","八进制数",
		"十六进制数","浮点数","运算符","复合运算符","字符","字符串","注释","关键字" };

struct TypeResult
{
	CPPTYPE type;
	const char * Adding;
};

void GotoNextSplite(char *buff, int &start);
void SkipBlank(char * buff, int &start);
int LineNum = 0;

TypeResult HandleNumber(char *buff, int &start) {
	static bool flag_L, flag_LL, flag_U, flag_FL;
	TypeResult Result;
	flag_L = false; flag_LL = false; flag_U = false; flag_FL = false;
	switch (buff[start])
	{
	case '0':
		if (buff[start + 1] == 'x' || buff[start + 1] == 'X') {
			start += 2;
			goto HEX;
		}
		else 
			goto OCT;
	default:
		goto DEC;
	}
DEC:
	switch (buff[start])
	{
	C_DIGIT
		start++;
		goto DEC;
	C_SPLITE
		Result.type = CPPTYPE::CPP_NUMBER_DEC;
		goto FINISH;
	C_NUMBER_POSTFIX
		flag_L = true; flag_LL = true; flag_U = true;
		Result.type = CPPTYPE::CPP_NUMBER_DEC;
		goto POSTFIX;
	case '.':
		start++;
		goto DOUBLE;
	case 'e':case 'E':
		start++;
		goto SCM;
	default:
		Result.Adding = "错误的十进制数";
		goto ERROR;
	}
OCT:
	switch (buff[start])
	{
	C_OCT_DIGIT
		start++;
		goto OCT;
	C_NUMBER_POSTFIX
		flag_L = true; flag_LL = true; flag_U = true;
		Result.type = CPPTYPE::CPP_NUMBER_OCT;
		goto POSTFIX;
	C_SPLITE
		Result.type = CPPTYPE::CPP_NUMBER_OCT;
		goto FINISH;
	case '.':
		start++;
		goto DOUBLE;
	case '8':case '9':case '10':
		goto MABY_DOUBLE;
	case 'e':case 'E':
		goto SCM;
	default:
		Result.Adding = "不合法的八进制数";
		goto ERROR;
	}
HEX:
	switch (buff[start])
	{
	C_HEX_DIGIT
		start++;
		goto HEX;
	C_NUMBER_POSTFIX
		flag_L = true; flag_LL = true; flag_U = true;
		Result.type = CPPTYPE::CPP_NUMBER_HEX;
		goto POSTFIX;
	C_SPLITE
		Result.type = CPPTYPE::CPP_NUMBER_HEX;
		goto FINISH;
	default:
		Result.Adding = "不合法的十六进制数字";
		goto ERROR;
	}
MABY_DOUBLE:
	switch (buff[start])
	{
	C_DIGIT
		start++;
		goto MABY_DOUBLE;
	case '.':
		start++;
		goto DOUBLE;
	case 'e': case 'E':
		start++;
		goto SCM;
	default:
		Result.Adding = "不合法的八进制数";
		goto ERROR;
	}
DOUBLE:
	switch (buff[start])
	{
	C_DIGIT
		start++;
		goto DOUBLE;
	C_SPLITE
		Result.type = CPPTYPE::CPP_NUMBER_FLOAT;
		goto FINISH;
	case 'e': case 'E':
		start++;
		goto SCM;
	case 'l':case 'L':case 'F':case 'f':
		flag_FL = true;
		Result.type = CPPTYPE::CPP_NUMBER_FLOAT;
		goto POSTFIX;
	default:
		Result.Adding = "不合法的浮点数";
		goto ERROR;
	}
SCM:
	if (buff[start] == '+' | buff[start] == '-')
		start++;
	switch (buff[start])
	{
	C_HEX_DIGIT
		start++;
		goto SCM;
	C_SPLITE
		Result.type = CPPTYPE::CPP_NUMBER_SCM;
		goto FINISH;
	C_NUMBER_POSTFIX
		Result.type = CPPTYPE::CPP_NUMBER_SCM;
		flag_FL = true;
		goto POSTFIX;
	default:
		Result.Adding = "不合法的科学计数法";
		goto ERROR;
		break;
	}
POSTFIX:
	switch (buff[start])
	{
	case 'l':case 'L':
		if (flag_L)flag_L = false;
		else if (flag_LL) flag_LL = false;
		else if (flag_FL) flag_FL = false;
		else goto POSTFIX_ERROR;
		start++;
		goto POSTFIX;
	case 'U':case 'u':
		if (flag_U) flag_U = false;
		else goto POSTFIX_ERROR;
		start++;
		goto POSTFIX;
	case 'F':case 'f':
		if (flag_FL) flag_FL = false;
		else goto POSTFIX_ERROR;
		start++;
		goto POSTFIX;
	C_SPLITE
		goto FINISH;
	}
POSTFIX_ERROR:
	Result.Adding = "数字后缀错误";
ERROR:
	GotoNextSplite(buff, start);
	Result.type = CPPTYPE::CPP_NULL;
FINISH:
	return Result;
}

TypeResult HandleOperator(char*buff, int &start)
{
	TypeResult result;
	switch (buff[start])
	{
	case '*':case '=':case '!':case '^':case '%':
		if (buff[start + 1] == '=')
			goto COM;
		else
			goto OP;
	case '#':if (buff[start + 1] == '#')	goto COM; else goto OP;
	case ':':if (buff[start + 1] == ':')	goto COM; else goto OP;
	case '+':if (buff[start + 1] == '+' || buff[start+1] == '=')	goto COM; else goto OP;
	case '&':if (buff[start + 1] == '&' || buff[start+1] == '=')	goto COM; else goto OP;
	case '|':if (buff[start + 1] == '|' || buff[start+1] == '=')	goto COM; else goto OP;
	case '-':if (buff[start + 1] == '-' || buff[start++] == '>' || buff[start + 1] == '=')	goto COM; else goto OP;
	case '>':
		if (buff[start + 1] == '=')	
			goto COM; 
		else if (buff[start + 1] == '>')
			if (buff[start + 2] == '=')
				{ start++; goto COM; }
			else
				goto COM;
		else goto OP;
	case '<':
		if (buff[start + 1] == '=')	
			goto COM; 
		else if (buff[start + 1] == '>')
			if (buff[start + 2] == '=')
				{ start++; goto COM; }
			else
				goto COM;
		else goto OP;
	case '/':
		if (buff[start + 1] == '/') goto NOTES;
		else if (buff[start + 1] == '*') goto NOTES_BLOCK;
		else if (buff[start + 1] == '=') goto COM;
		else goto OP;
	default:
		break;
	}
COM:
	start += 2;
	result.type = CPPTYPE::CPP_OPERATOR_COM;
	return result;
OP:
	start++;
	result.type = CPPTYPE::CPP_OPERATOR;
	return result;
NOTES:
	switch (buff[start])
	{
	case '\n':
		start++;
		result.type = CPPTYPE::CPP_NOTES;
		return result;
	default:
		start++;
		goto NOTES;
	}
NOTES_BLOCK:
	switch (buff[start])
	{
	case '*':
		if (buff[start + 1] == '/') {
			start += 2;
			result.type = CPPTYPE::CPP_NOTES;
			return result;
		}
	default:
		start++;
		goto NOTES_BLOCK;
	}
}

TypeResult HandleString(char *buff, int &start,bool flag)
{
	TypeResult result;
	if (flag)
		goto ISSTRING;
ISCHAR:
	switch (buff[start])
	{
	case '\\':
		start++;
		goto TRANSFERR;
	case '\n':
		GotoNextSplite(buff, start);
		result.Adding = "错误的字符结尾";
		result.type = CPPTYPE::CPP_NULL;
		return result;
	default:
		start++;
	}
ISCHAR__:
	if (buff[start] == '\'') {
		result.type = CPPTYPE::CPP_CHAR;
		start++;
		return result;
	}
	else {
		GotoNextSplite(buff, start);
		result.Adding = "错误的字符";
		result.type = CPPTYPE::CPP_NULL;
		return result;
	}
ISSTRING:
	switch (buff[start++])
	{
	case '\\':
		goto TRANSFERR;
	case '\n':
		GotoNextSplite(buff, start);
		result.Adding = "错误的字符结尾";
		result.type = CPPTYPE::CPP_NULL;
		return result;
	case '"':
		result.type = CPPTYPE::CPP_STRING;
		return result;
	default:
		goto ISSTRING;
	}
ADAPT:
	if (flag)
		goto ISSTRING;
	else
		goto ISCHAR__;
TRANSFERR:
	switch (buff[start])
	{
	case '\\':case '\'':case '\"':case '\?':
	case 'a':case 'b':case 'f':case 'n':case 'r':case 't':case 'v':
		start++;
		goto ADAPT;
	case '0':
		switch (buff[start+1])
		{
		C_OCT_DIGIT
			switch (buff[start+2])
			{
			C_OCT_DIGIT
				start += 3;
				goto ADAPT;
			default:
				goto ERROR;
			}
		default:
			start++;
			goto ADAPT;
		}
	case 'x':
		switch (buff[start+1])
		{
		C_HEX_DIGIT
			switch (buff[start+2])
			{
			C_HEX_DIGIT
				start += 3;
				goto ADAPT;
			default:
				goto ERROR;
			}
	default:
		goto ERROR;
		}
	}
ERROR:
	GotoNextSplite(buff, start);
	result.Adding = "错误的转义";
	result.type = CPPTYPE::CPP_NULL;
	return result;
}

void inline HandleNames(char *buff, int &start) {
start:
	switch (buff[start])
	{
	C_LETTER
	C_DIGIT
	case '_':
		start++;
		goto start;
	default:
		return;
	}
}

string my_print(char *buff, int start, int end) {
	string a(buff + start, end - start);
	return a;
}

int main()
{
	int CPPNAMECount = 0;
	unordered_map<string, int> Table;
	int count[13] = { 0 };
	int AllLetter = 0;
	char *buff = new char[MaxLen*2];
	memset(buff, 0, sizeof(char)*MaxLen * 2);
	int index = 0;
	int pre = 0;
	ifstream fs("词法分析.cpp");
	fs.read(buff, MaxLen*2);
	TypeResult result;
start:
	pre = index;
	switch (buff[index])
	{
	case '.':
		switch (buff[index+1])
		{
		case '.':
			if (buff[index + 2] != '.')
			{
				result.type = CPPTYPE::CPP_NULL;
				result.Adding = "不合法的运算符";
				GotoNextSplite(buff, index);
				goto OUT;
			}
			else {
				index += 3;
				result.type = CPPTYPE::CPP_OPERATOR_COM;
				goto OUT;
			}
		C_DIGIT
			goto NUM_HANDLE;
		default:
			index++;
			result.type = CPPTYPE::CPP_OPERATOR;
			goto OUT;
		}
NUM_HANDLE:
	C_DIGIT
		result = HandleNumber(buff, index);
		goto OUT;
	C_OPERATOR_SINGLE case '\\':
		index++;
		result.type = CPPTYPE::CPP_OPERATOR;
		goto OUT;
	C_OPERATOR
		result = HandleOperator(buff, index);
		goto OUT;
	C_LETTER case '_':
		HandleNames(buff, index);
		result.type = CPPTYPE::CPP_NAME;
		goto OUT;
	case '\'':
		index++;
		result = HandleString(buff, index, false);
		goto OUT;
	case '"':
		index++;
		result = HandleString(buff, index, true);
		goto OUT;
	default:
		AllLetter += index;
		for (int i = 0; i < 13; i++)
			cout << CPPPTYPE_NAME[i] << ':' << count[i] << std::endl;
		cout << "行数:" << LineNum << std::endl;
		cout << "总字数:" << AllLetter << std::endl;
		return 0;
	}
OUT:
	string a = my_print(buff, pre, index);
	if (CPPTYPE::CPP_NOTES != result.type)
		cout << a;
	if (result.type == CPPTYPE::CPP_NULL) {
		printf("\t\t%s\n", result.Adding);
		SkipBlank(buff, index);
		goto start;
	}
	if (result.type == CPPTYPE::CPP_NAME)
		if (KeyTable.count(a))
			result.type = CPPTYPE::CPP_KEY;
	if (result.type == CPPTYPE::CPP_NAME && !Table.count(a))
		Table.insert(std::pair<string,int>(a, CPPNAMECount++));
	if (result.type != CPPTYPE::CPP_NOTES)
		printf("\t \t%s\t", CPPPTYPE_NAME[result.type]);
	count[result.type]++;
	switch (result.type)
	{
	case CPPTYPE::CPP_NAME:
		cout << Table[a] << std::endl; break;
	case CPPTYPE::CPP_KEY:
		cout << KeyTable[a] << std::endl; break;
	case CPPTYPE::CPP_OPERATOR_COM:
		cout << Operator[a] << std::endl; break;
	default:
	if (result.type != CPPTYPE::CPP_NOTES)
		cout << a << std::endl; break;
	}
	SkipBlank(buff, index);
	if (index >= MaxLen) {
		index -= MaxLen;
		AllLetter += MaxLen;
		memcpy(buff, buff + MaxLen, MaxLen);
		memset(buff + MaxLen, 0, MaxLen);
		fs.read(buff + MaxLen, MaxLen);
	}
	goto start;
	return 0;
}

void GotoNextSplite(char *buff, int &start)
{
start:
	switch (buff[start])
	{
	C_BLANK
	C_OPERATOR
	C_OPERATOR_SINGLE
		return;
	default:
		start++;
		goto start;
	}
}

void SkipBlank(char* buff, int &start) {
start:
	switch (buff[start])
	{
	case '\n':
		LineNum++;
	case ' ':case '\t':case '\v':case '\f':
		start++;
		goto start;
	default:
		return;
	}
}

lex辅助代码

digit 		[0-9]
digit8		[0-7]
digit16		[0-9a-fA-F]
postfix		((u|U)?(l|L)?(l|L)?)|((l|L)?(u|U)?(l|L)?)|((l|L)?(l|L)?(u|U)?)
postfixf	(l|L)|(f|F)
letter 		[A-Za-z_]
note 		"//"[^\n]*
notes 		"/*"([^\*]|(\*)*[^\*/])*"*/"
id 		{letter}({letter}|{digit})*
char		(\\[abfnrlvt\'\"\?\\])|(\\[0-7]([0-7][0-7])?)|("\\x"{digit16}{digit16})
achar		\'([^\n\'\\]|{char})\'
string		\"([^\n\\\"]|{char})*\"
number		[1-9]{digit}*{postfix}?
number8		0{digit8}*{postfix}?
number16	("0x"|"0X"){digit16}+{postfix}?
numberSCM	{digit}*\.?{digit}+(\e|\E)(\+|\-)?{digit}*{postfixf}?
numberF		{digit}*\.{digit}+{postfixf}?
operator	[\?\~\,\(\)\{\}\[\]\;\:\.]
%%
"*="		{return 12;}
"*"		{return 13;}
==		{return 14;}
=		{return 15;}
!=		{return 16;}
!		{return 17;}
"^="		{return 18;}
"^"		{return 19;}
##		{return 20;}
#		{return 21;}
"++"		{return 22;}
"+="		{return 23;}
"+"		{return 24;}
"--"		{return 25;}
"-="		{return 26;}
"->"		{return 27;}
"-"		{return 28;}
"||"		{return 29;}
"|="		{return 30;}
"|"		{return 31;}
&&		{return 32;}
&=		{return 33;}
&		{return 34;}
"<<="		{return 35;}
"<<"		{return 36;}
"<="		{return 37;}
"<"		{return 38;}
">>="		{return 39;}
">>"		{return 40;}
">="		{return 41;}
">"		{return 42;}
{note} 		{return 0;}
{notes}		{return 1;}
{id} 		{return 2;}
{achar}		{return 4;}
{string}	{return 5;}
{number}	{return 6;}
{number8}	{return 7;}
{number16}	{return 8;}
{numberSCM}	{return 9;}
{numberF}	{return 10;}
{operator}	{return 11;}
"\n"		{return 43;}
[ \t\v\f]*	{return 44;}
.		{return 45;}
%%
int yyFlexLexer::yywrap()
{
	return 1;
}
#include 
#include 
//一般位于linux下‘/usr/include/FlexLexer.h‘下,如果再windows上开发,拷过来就可以
#include 
#include 
#include 
#include 
using std::ifstream;
using std::cout;
using std::endl;
using std::string;
using std::unordered_map;

extern unordered_map<string, int> KeyTable;

unordered_map<string, int> KeyTable2 = {
	{"asm",0},{"auto",1},{"bool",2},{"break",3},{"case",4},{"catch",5},{"char",6},
	{"class",7},{"const",8},{"continue",9},{"default",10},{"delete",11},{"do",12},{"double",13},
	{"else",14},{"enum",15},{"explicit",16},{"export",17},{"extern",18},{"false",19},{"float",20},
	{"for",21},{"friend",22},{"goto",23},{"if",24},{"inline",25},{"int",26},{"long",27},
	{"namespace",28},{"mutable",29},{"new",30},{"operator",31},{"private",32},{"protected",33},{"public",35},
	{"register",36},{"return",37},{"signed",38},{"sizeof",39},{"static",40},{"struct",41},{"switch",42},
	{"template",43},{"this",44},{"throw",45},{"true",46},{"try",47},{"typedef",48},{"void",49},
	{"typeid",50},{"typename",51},{"union",52},{"unsigned",53},{"using",54},{"virtual",55},{"volatile",56}
};

const char* name[] = {
	"行注释","块注释","标识符","关键字","字符","字符串","十进制数字","八进制数字","十六进制数字","科学计数法数字","浮点数","运算符","操作符"
};

int maina()
{
	yyFlexLexer Scaner;
	ifstream ifile("词法分析.cpp");
	Scaner.switch_streams(ifile,std::cout);
	unordered_map<string, int> table;
	int c;
	string ID;
	int typeCount[13] = { 0 };
	int IDcount = 0;
	int line = 0;
	int allLetter = 0;
	while (c = Scaner.yylex())
	{
		switch (c)
		{
		case 2:
			ID = string(Scaner.YYText());
			if (KeyTable.count(ID)) {
				cout << Scaner.YYText() << "\t\t关键字\t\t" << KeyTable[ID] << endl;
				typeCount[3]++;
			}
			else {
				if (!table.count(ID))
					table.insert(std::pair<string, int>(ID, IDcount++));
				cout << Scaner.YYText() << "\t\t标识符\t\t" << table[ID] << endl;
				typeCount[2]++;
			}
			break;
		case 0:case 1:case 3:case 4:case 5:case 6:case 7:case 8:case 9:case 10:case 11:
			cout << Scaner.YYText() << "\t\t" << name[c] << "\t\t" << Scaner.YYText() << endl;
			typeCount[c]++;
			break;
		case 45:
			cout << "\t\t在" << line << "行 ->" << Scaner.YYText() << "<- 附件有错误" << endl;
		break;
		case 43:
			line++;
		case 44:
			break;
		default:
			cout << Scaner.YYText() << "\t\t运算符\t\t" << c << endl;
			break;
		}
		allLetter += Scaner.YYLeng();
	}
	for (int i = 2; i < 13; i++)
		cout << name[i] << ":" << typeCount[i] << std::endl;
	cout << "总行数:" << line << endl;
	cout << "总字数:" << allLetter << endl;
	return 0;
}

你可能感兴趣的:(C语言词法分析程序的设计与实现)