随想录(c编译器的实现)

【 声明:版权所有,欢迎转载,请勿用于商业用途。 联系信箱:feixiaoxing @163.com】


    无意中在github上发现一个很有意思的项目,比如这里https://github.com/hukun01/CMMInterpreter。它利用flex实现了字符串的识别,利用bison实现了ast语法树的构建,最后直接利用ast进行计算和识别。ast节点遍历的时候,作者应该是根左右遍历的,其实左右根遍历或许更好一些。注意,语法解析的时候肯定是树的结构,但是不一定是二叉树。


    比如,它的lex文件是这么写的,

/***********************************/
/* File: cmm.l    version 2.0      */
/* Flex version for CMM            */
/* CMM Interpreter Construction    */
/***********************************/
%option noyywrap
%{
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "cmm.tab.h"
%}

digit		[0-9]
int_num		{digit}+
real_num	{digit}+"."{digit}*
letter		[a-zA-Z]
identifier	{letter}+({letter}|{digit}|_)*({letter}+|{digit}+)|{letter}+
whitespace	[ \t]*

%%

"if"		{ return IF;}
"else"		{ return ELSE;}
"while"		{ return WHILE;}
"int"		{ yylval.dataType = Int; return INT;} 
"real"		{ yylval.dataType = Real; return REAL;}
"bool"		{ yylval.dataType = Bool; return BOOL;}
"read"		{ return READ;}
"write"		{ return WRITE;}
"main"		{ return MAIN;}

{int_num}		{ yylval.intval = atoi(yytext); return INT_VALUE;}
{real_num}		{ yylval.realval = atof(yytext); return REAL_VALUE;}
{identifier}	{ yylval.idName = copyString(yytext); return ID;}
{whitespace}	{/*Do nothing.*/}

"+"		{ return PLUS;}
"-"		{ return SUB;}
"*"		{ return MUL;}
"/"		{ return DIV;}
"<"		{ yylval.binaryOperator = 1; return REL_OP;}
">"		{ yylval.binaryOperator = 2; return REL_OP;}
"<="	{ yylval.binaryOperator = 3; return REL_OP;}
">="	{ yylval.binaryOperator = 4; return REL_OP;}
"=="	{ yylval.binaryOperator = 5; return REL_OP;}
"<>"	{ yylval.binaryOperator = 6; return REL_OP;}
"&&"	{ yylval.binaryOperator = 7; return REL_OP;}
"||"	{ yylval.binaryOperator = 8; return REL_OP;}

"="		{ return ASSIGN;}
"("		{ return LPAREN;}
")"		{ return RPAREN;}
";"		{ return SEMI;}
"{"		{ return LBRACE;}
"}"		{ return RBRACE;}
"["		{ return LBRACKET;}
"]"		{ return RBRACKET;}
","		{ return COMMA;}

"//"	{ char c = input();
		  while(c != '\n')
		  {	if (c == EOF) break;
		  	c = input();
		  }
		  lineno++;
		}
"/*"	{	char c;
			int flag = 1;
			do
			{	c = input();
				entry1:
				if (c == EOF) break;
				if (c == '\n') lineno++;
				if (c == '*')
				{	c = input();
					if (c == '/')
						flag = 0;
					else
						goto entry1;
				}
			} while (flag);
		}
\n		{lineno++;}
.		{yyerror("Mystery character %s\n", yytext);return ERROR;}

%%
/* 用于语法分析时初始化词法分析接口 */
void iniLexer(void)
{
	static int firstTime = TRUE;
	lineno = 0;
	if (firstTime)
	{	firstTime = FALSE;
		lineno++;
		yyin = source;
		yyout = listing;
	}
}
/* 词法分析器专用 
TokenType getToken(void)
{	static int firstTime = TRUE;
	TokenType currentToken;
	if (firstTime)
	{	firstTime = FALSE;
		lineno++;
		yyin = source;
		yyout = listing;
	}
	currentToken = yylex();
	strncpy(tokenString,yytext,MAXTOKENLEN);
	if (TraceScan) {
		fprintf(listing,"\t%d: ",lineno);
		printToken(currentToken);
	}
	return currentToken;
}*/
    它的yacc文件是这么写的,

/***********************************/
/* File: cmm.y                     */
/* Bison grammar file about CMM    */
/* CMM Interpreter Construction    */
/***********************************/

/*预计有1个移进/归约冲突*/
%expect 1
%{

#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"

static TreeNode * savedTree; /* stores syntax tree for later return */

%}

%union{
	struct treeNode * node;
	int intval;
	double realval;
	char * idName;
	int binaryOperator;
	int dataType;
}
%token  INT_VALUE
%token  REAL_VALUE
%token  ID
%token  INT REAL BOOL

/* 优先级声明 */
%right ASSIGN
%left PLUS SUB
%left MUL DIV
%nonassoc  REL_OP
%nonassoc UMINUS
/* 声明文法中用到的tokens */
%token IF ELSE WHILE READ WRITE MAIN
%token LPAREN RPAREN SEMI LBRACE RBRACE LBRACKET RBRACKET COMMA
%token ASSIGN
%token NEWLINE ERROR

%type  stmt_list stmt
%type  if_stmt decl_stmt compound_stmt while_stmt assign_stmt read_stmt write_stmt
%type  exp factor bin_exp
%type  type_spec

%start program

%% /* CMM文法 */

program		: stmt_list
				{ savedTree = $1;}
			;
stmt_list	:	{ $$ = NULL; }
			| stmt_list stmt 
				{	TreeNode * t = $1;
					if (t != NULL)
					{	while (t->sibling != NULL){ t = t->sibling;}
						t->sibling = $2;
						$$ = $1;
					}
					else $$ = $2;
				}
			;
stmt		: if_stmt { $$ = $1; }
			| decl_stmt SEMI { $$ = $1; }
			| compound_stmt { $$ = $1; }
			| while_stmt { $$ = $1; }
			| assign_stmt SEMI { $$ = $1; }
			| read_stmt SEMI { $$ = $1; }
			| write_stmt SEMI { $$ = $1; }
			| error  { $$ = NULL; }
			;
compound_stmt
			: LBRACE stmt_list RBRACE
				{	$$ = newStmtNode(CompoundK);
					$$->child[0] = $2;
					$$->lineno = lineno;
				}
			;
decl_stmt	: type_spec ID 
				{	$$ = newStmtNode(DeclK);
					$$->attr.name = $2;
					/* 数组长度为0代表非数组 */
					$$->arrayLength = 0;
					$$->type = $1;
					$$->lineno = lineno;
				}
			| type_spec ID LBRACKET INT_VALUE RBRACKET
				{	$$ = newStmtNode(DeclK);
					$$->attr.name = $2;
					$$->arrayLength = $4;
					$$->type = $1;
					if($$->type == Int) {
						int int_array_temp[$4];
						$$->array.intArray = int_array_temp;
					}
					else if($$->type == Real) {
						double real_array_temp[$4];
						$$->array.realArray = real_array_temp;
					}
					$$->lineno = lineno;
				}
			;
type_spec	: INT 	{ $$ = $1; }
			| REAL 	{ $$ = $1; }
			| BOOL	{ $$ = $1; }
			;
if_stmt     : IF LPAREN bin_exp RPAREN stmt
				{	$$ = newStmtNode(IfK);
					$$->child[0] = $3;
					$$->child[1] = $5;
					$$->child[2] = NULL;
					$$->lineno = lineno;
				}
			| IF LPAREN bin_exp RPAREN stmt ELSE stmt
				{	$$ = newStmtNode(IfK);
					$$->child[0] = $3;
					$$->child[1] = $5;
					$$->child[2] = $7;
					$$->lineno = lineno;
				}
			;
while_stmt	: WHILE LPAREN bin_exp RPAREN stmt
				{	$$ = newStmtNode(WhileK);
					$$->child[0] = $3;
					$$->child[1] = $5;
					$$->lineno = lineno;
				}
			;
assign_stmt : ID ASSIGN exp
				{	$$ = newStmtNode(AssignK);
					$$->attr.name = $1;
					$$->child[0] = $3;
					$$->child[1] = NULL;
					$$->lineno = lineno;
				}
			| ID LBRACKET exp RBRACKET ASSIGN exp
				{	$$ = newStmtNode(AssignK);
					$$->attr.name = $1;
					/* child[1]不为NULL 表示引用的变量是一个数组元素 */
					/* child[0] 是数组元素的下标 */
					$$->child[0] = $3;
					$$->child[1] = $6;
					$$->lineno = lineno;
				}
			;
read_stmt   : READ LPAREN ID RPAREN
				{	$$ = newStmtNode(ReadK);
					$$->attr.name = $3;
					$$->child[0] = NULL;
					$$->lineno = lineno;
				}
			| READ LPAREN ID LBRACKET exp RBRACKET RPAREN
				{	$$ = newStmtNode(ReadK);
					$$->attr.name = $3;
					$$->child[0] = $5;
					$$->lineno = lineno;
				}
			;
write_stmt  : WRITE LPAREN exp RPAREN
				{	$$ = newStmtNode(WriteK);
					$$->child[0] = $3;
					$$->lineno = lineno;
				}
			;
exp			: factor
				{	$$ = $1; }
			| bin_exp
				{	$$ = $1; }
			;
factor		: INT_VALUE
				{	$$ = newExpNode(IntValueK);
					$$->value.int_val = $1;
					$$->type = Int;
					$$->lineno = lineno;
				}
			| REAL_VALUE
				{	$$ = newExpNode(RealValueK);
					$$->value.real_val = $1;
					$$->type = Real;
					$$->lineno = lineno;
				}
			| LPAREN exp RPAREN
				{	$$ = $2; }
			| ID
				{	$$ = newExpNode(IdK);
					$$->attr.name = $1;
					/* child[0]为NULL代表该变量不是数组,用作解释时 */
					$$->child[0] = NULL;
					$$->lineno = lineno;
				}
			| ID LBRACKET exp RBRACKET
				{	$$ = newExpNode(IdK);
					$$->attr.name = $1;
					$$->child[0] = $3;
					$$->lineno = lineno;
				}
			| error { $$ = NULL; }
			;
bin_exp     : /* 关系运算符 */
			  exp REL_OP exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = $2;
					$$->lineno = lineno;
				}
			/* 算数运算符 */
			| exp PLUS exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = PLUS;
					$$->lineno = lineno;
				}
			| exp SUB exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = SUB;
					$$->lineno = lineno;
				}
			| exp MUL exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = MUL;
					$$->lineno = lineno;
				}
			| exp DIV exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = DIV;
					$$->lineno = lineno;
				}
			| SUB exp %prec UMINUS
				{	$$ = newExpNode(OpK);
					$$->child[0] = $2;
					$$->child[1] = NULL;
					$$->attr.op = UMINUS;
					$$->lineno = lineno;
				}
			;

%%

int yyerror(char * message)
{	fprintf(listing,"Syntax error at line %d: %s\n",lineno,message);
	fprintf(listing,"Current token: %s",tokenString);
	printToken(yychar);
	Error = TRUE;
	return 0;
}
/* 与主函数交互的语法分析函数 */
TreeNode * parse(void)
{	iniLexer();
	yyparse();
	return savedTree;
}
    编译也非常简单,

#The makefile for interpreter based on Flex and Bison
CC = gcc
FLAG = -w
interpreter: main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
	$(CC) $(FLAG) -o ../bin\&test/interpreter main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
main.o: main.c parse.h analyze.h
	$(CC) $(FLAG) -c main.c
cmm.tab.o:cmm.tab.c parse.h
	$(CC) $(FLAG) -c cmm.tab.c
cmm.tab.c:cmm.y
	bison -d cmm.y
cmm.lex.o:cmm.lex.c cmm.tab.h
	$(CC) $(FLAG) -c cmm.lex.c
cmm.lex.c:cmm.l
	flex -o cmm.lex.c cmm.l
symtab.o:symtab.c symtab.h globals.h
	$(CC) $(FLAG) -c symtab.c
analyze.o:analyze.c globals.h symtab.h analyze.h
	$(CC) $(FLAG) -c analyze.c
util.o:util.c
	$(CC) $(FLAG) -c util.c

clean:
	-rm *.o
    最后测试的时候,就是直接利用./interpreter [file_name]就可以了。

    有兴趣的同学可以把这个项目下载下来,多看看,了解一下如果利用flex&bison快速实现一个脚本语言。


你可能感兴趣的:(随想录)