【 声明:版权所有,欢迎转载,请勿用于商业用途。 联系信箱:feixiaoxing @163.com】
无意中在github上发现一个很有意思的项目,比如这里https://github.com/hukun01/CMMInterpreter。它利用flex实现了字符串的识别,利用bison实现了ast语法树的构建,最后直接利用ast进行计算和识别。ast节点遍历的时候,作者应该是根左右遍历的,其实左右根遍历或许更好一些。注意,语法解析的时候肯定是树的结构,但是不一定是二叉树。
比如,它的lex文件是这么写的,
/***********************************/
/* File: cmm.l version 2.0 */
/* Flex version for CMM */
/* CMM Interpreter Construction */
/***********************************/
%option noyywrap
%{
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "cmm.tab.h"
%}
digit [0-9]
int_num {digit}+
real_num {digit}+"."{digit}*
letter [a-zA-Z]
identifier {letter}+({letter}|{digit}|_)*({letter}+|{digit}+)|{letter}+
whitespace [ \t]*
%%
"if" { return IF;}
"else" { return ELSE;}
"while" { return WHILE;}
"int" { yylval.dataType = Int; return INT;}
"real" { yylval.dataType = Real; return REAL;}
"bool" { yylval.dataType = Bool; return BOOL;}
"read" { return READ;}
"write" { return WRITE;}
"main" { return MAIN;}
{int_num} { yylval.intval = atoi(yytext); return INT_VALUE;}
{real_num} { yylval.realval = atof(yytext); return REAL_VALUE;}
{identifier} { yylval.idName = copyString(yytext); return ID;}
{whitespace} {/*Do nothing.*/}
"+" { return PLUS;}
"-" { return SUB;}
"*" { return MUL;}
"/" { return DIV;}
"<" { yylval.binaryOperator = 1; return REL_OP;}
">" { yylval.binaryOperator = 2; return REL_OP;}
"<=" { yylval.binaryOperator = 3; return REL_OP;}
">=" { yylval.binaryOperator = 4; return REL_OP;}
"==" { yylval.binaryOperator = 5; return REL_OP;}
"<>" { yylval.binaryOperator = 6; return REL_OP;}
"&&" { yylval.binaryOperator = 7; return REL_OP;}
"||" { yylval.binaryOperator = 8; return REL_OP;}
"=" { return ASSIGN;}
"(" { return LPAREN;}
")" { return RPAREN;}
";" { return SEMI;}
"{" { return LBRACE;}
"}" { return RBRACE;}
"[" { return LBRACKET;}
"]" { return RBRACKET;}
"," { return COMMA;}
"//" { char c = input();
while(c != '\n')
{ if (c == EOF) break;
c = input();
}
lineno++;
}
"/*" { char c;
int flag = 1;
do
{ c = input();
entry1:
if (c == EOF) break;
if (c == '\n') lineno++;
if (c == '*')
{ c = input();
if (c == '/')
flag = 0;
else
goto entry1;
}
} while (flag);
}
\n {lineno++;}
. {yyerror("Mystery character %s\n", yytext);return ERROR;}
%%
/* 用于语法分析时初始化词法分析接口 */
void iniLexer(void)
{
static int firstTime = TRUE;
lineno = 0;
if (firstTime)
{ firstTime = FALSE;
lineno++;
yyin = source;
yyout = listing;
}
}
/* 词法分析器专用
TokenType getToken(void)
{ static int firstTime = TRUE;
TokenType currentToken;
if (firstTime)
{ firstTime = FALSE;
lineno++;
yyin = source;
yyout = listing;
}
currentToken = yylex();
strncpy(tokenString,yytext,MAXTOKENLEN);
if (TraceScan) {
fprintf(listing,"\t%d: ",lineno);
printToken(currentToken);
}
return currentToken;
}*/
它的yacc文件是这么写的,
/***********************************/
/* File: cmm.y */
/* Bison grammar file about CMM */
/* CMM Interpreter Construction */
/***********************************/
/*预计有1个移进/归约冲突*/
%expect 1
%{
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"
static TreeNode * savedTree; /* stores syntax tree for later return */
%}
%union{
struct treeNode * node;
int intval;
double realval;
char * idName;
int binaryOperator;
int dataType;
}
%token INT_VALUE
%token REAL_VALUE
%token ID
%token INT REAL BOOL
/* 优先级声明 */
%right ASSIGN
%left PLUS SUB
%left MUL DIV
%nonassoc REL_OP
%nonassoc UMINUS
/* 声明文法中用到的tokens */
%token IF ELSE WHILE READ WRITE MAIN
%token LPAREN RPAREN SEMI LBRACE RBRACE LBRACKET RBRACKET COMMA
%token ASSIGN
%token NEWLINE ERROR
%type stmt_list stmt
%type if_stmt decl_stmt compound_stmt while_stmt assign_stmt read_stmt write_stmt
%type exp factor bin_exp
%type type_spec
%start program
%% /* CMM文法 */
program : stmt_list
{ savedTree = $1;}
;
stmt_list : { $$ = NULL; }
| stmt_list stmt
{ TreeNode * t = $1;
if (t != NULL)
{ while (t->sibling != NULL){ t = t->sibling;}
t->sibling = $2;
$$ = $1;
}
else $$ = $2;
}
;
stmt : if_stmt { $$ = $1; }
| decl_stmt SEMI { $$ = $1; }
| compound_stmt { $$ = $1; }
| while_stmt { $$ = $1; }
| assign_stmt SEMI { $$ = $1; }
| read_stmt SEMI { $$ = $1; }
| write_stmt SEMI { $$ = $1; }
| error { $$ = NULL; }
;
compound_stmt
: LBRACE stmt_list RBRACE
{ $$ = newStmtNode(CompoundK);
$$->child[0] = $2;
$$->lineno = lineno;
}
;
decl_stmt : type_spec ID
{ $$ = newStmtNode(DeclK);
$$->attr.name = $2;
/* 数组长度为0代表非数组 */
$$->arrayLength = 0;
$$->type = $1;
$$->lineno = lineno;
}
| type_spec ID LBRACKET INT_VALUE RBRACKET
{ $$ = newStmtNode(DeclK);
$$->attr.name = $2;
$$->arrayLength = $4;
$$->type = $1;
if($$->type == Int) {
int int_array_temp[$4];
$$->array.intArray = int_array_temp;
}
else if($$->type == Real) {
double real_array_temp[$4];
$$->array.realArray = real_array_temp;
}
$$->lineno = lineno;
}
;
type_spec : INT { $$ = $1; }
| REAL { $$ = $1; }
| BOOL { $$ = $1; }
;
if_stmt : IF LPAREN bin_exp RPAREN stmt
{ $$ = newStmtNode(IfK);
$$->child[0] = $3;
$$->child[1] = $5;
$$->child[2] = NULL;
$$->lineno = lineno;
}
| IF LPAREN bin_exp RPAREN stmt ELSE stmt
{ $$ = newStmtNode(IfK);
$$->child[0] = $3;
$$->child[1] = $5;
$$->child[2] = $7;
$$->lineno = lineno;
}
;
while_stmt : WHILE LPAREN bin_exp RPAREN stmt
{ $$ = newStmtNode(WhileK);
$$->child[0] = $3;
$$->child[1] = $5;
$$->lineno = lineno;
}
;
assign_stmt : ID ASSIGN exp
{ $$ = newStmtNode(AssignK);
$$->attr.name = $1;
$$->child[0] = $3;
$$->child[1] = NULL;
$$->lineno = lineno;
}
| ID LBRACKET exp RBRACKET ASSIGN exp
{ $$ = newStmtNode(AssignK);
$$->attr.name = $1;
/* child[1]不为NULL 表示引用的变量是一个数组元素 */
/* child[0] 是数组元素的下标 */
$$->child[0] = $3;
$$->child[1] = $6;
$$->lineno = lineno;
}
;
read_stmt : READ LPAREN ID RPAREN
{ $$ = newStmtNode(ReadK);
$$->attr.name = $3;
$$->child[0] = NULL;
$$->lineno = lineno;
}
| READ LPAREN ID LBRACKET exp RBRACKET RPAREN
{ $$ = newStmtNode(ReadK);
$$->attr.name = $3;
$$->child[0] = $5;
$$->lineno = lineno;
}
;
write_stmt : WRITE LPAREN exp RPAREN
{ $$ = newStmtNode(WriteK);
$$->child[0] = $3;
$$->lineno = lineno;
}
;
exp : factor
{ $$ = $1; }
| bin_exp
{ $$ = $1; }
;
factor : INT_VALUE
{ $$ = newExpNode(IntValueK);
$$->value.int_val = $1;
$$->type = Int;
$$->lineno = lineno;
}
| REAL_VALUE
{ $$ = newExpNode(RealValueK);
$$->value.real_val = $1;
$$->type = Real;
$$->lineno = lineno;
}
| LPAREN exp RPAREN
{ $$ = $2; }
| ID
{ $$ = newExpNode(IdK);
$$->attr.name = $1;
/* child[0]为NULL代表该变量不是数组,用作解释时 */
$$->child[0] = NULL;
$$->lineno = lineno;
}
| ID LBRACKET exp RBRACKET
{ $$ = newExpNode(IdK);
$$->attr.name = $1;
$$->child[0] = $3;
$$->lineno = lineno;
}
| error { $$ = NULL; }
;
bin_exp : /* 关系运算符 */
exp REL_OP exp
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = $2;
$$->lineno = lineno;
}
/* 算数运算符 */
| exp PLUS exp
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = PLUS;
$$->lineno = lineno;
}
| exp SUB exp
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = SUB;
$$->lineno = lineno;
}
| exp MUL exp
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = MUL;
$$->lineno = lineno;
}
| exp DIV exp
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = DIV;
$$->lineno = lineno;
}
| SUB exp %prec UMINUS
{ $$ = newExpNode(OpK);
$$->child[0] = $2;
$$->child[1] = NULL;
$$->attr.op = UMINUS;
$$->lineno = lineno;
}
;
%%
int yyerror(char * message)
{ fprintf(listing,"Syntax error at line %d: %s\n",lineno,message);
fprintf(listing,"Current token: %s",tokenString);
printToken(yychar);
Error = TRUE;
return 0;
}
/* 与主函数交互的语法分析函数 */
TreeNode * parse(void)
{ iniLexer();
yyparse();
return savedTree;
}
编译也非常简单,
#The makefile for interpreter based on Flex and Bison
CC = gcc
FLAG = -w
interpreter: main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
$(CC) $(FLAG) -o ../bin\&test/interpreter main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
main.o: main.c parse.h analyze.h
$(CC) $(FLAG) -c main.c
cmm.tab.o:cmm.tab.c parse.h
$(CC) $(FLAG) -c cmm.tab.c
cmm.tab.c:cmm.y
bison -d cmm.y
cmm.lex.o:cmm.lex.c cmm.tab.h
$(CC) $(FLAG) -c cmm.lex.c
cmm.lex.c:cmm.l
flex -o cmm.lex.c cmm.l
symtab.o:symtab.c symtab.h globals.h
$(CC) $(FLAG) -c symtab.c
analyze.o:analyze.c globals.h symtab.h analyze.h
$(CC) $(FLAG) -c analyze.c
util.o:util.c
$(CC) $(FLAG) -c util.c
clean:
-rm *.o
最后测试的时候,就是直接利用./interpreter [file_name]就可以了。
有兴趣的同学可以把这个项目下载下来,多看看,了解一下如果利用flex&bison快速实现一个脚本语言。