%{ #include "globals.h" #include "util.h" #include "scan.h" /* lexeme of identifier or reserved word */ char tokenString[MAXTOKENLEN+1]; %} digit [0-9] number {digit}+ letter [a-zA-Z] identifier {letter}+ newline /n whitespace [ /t]+ %% "if" {return IF;} "then" {return THEN;} "else" {return ELSE;} "end" {return END;} "repeat" {return REPEAT;} "until" {return UNTIL;} "read" {return READ;} "write" {return WRITE;} ":=" {return ASSIGN;} "=" {return EQ;} "<" {return LT;} "+" {return PLUS;} "-" {return MINUS;} "*" {return TIMES;} "/" {return OVER;} "(" {return LPAREN;} ")" {return RPAREN;} ";" {return SEMI;} {number} {return NUM;} {identifier} {return ID;} {newline} {lineno++;} {whitespace} {/* skip whitespace */} "{" { char c; do { c = input(); if (c == EOF) break; if (c == '/n') lineno++; } while (c != '}'); } . {return ERROR;} %% TokenType getToken(void) { static int firstTime = TRUE; TokenType currentToken; if (firstTime) { firstTime = FALSE; lineno++; yyin = source; yyout = listing; } currentToken = yylex(); strncpy(tokenString,yytext,MAXTOKENLEN); if (TraceScan) { fprintf(listing,"/t%d: ",lineno); printToken(currentToken,tokenString); } return currentToken; } |
%{ #define YYPARSER /* distinguishes Yacc output from other code files */ #include "globals.h" #include "util.h" #include "scan.h" #include "parse.h" #define YYSTYPE TreeNode * static char * savedName; /* for use in assignments */ static int savedLineNo; /* ditto */ static TreeNode * savedTree; /* stores syntax tree for later return */ %} %token IF THEN ELSE END REPEAT UNTIL READ WRITE %token ID NUM %token ASSIGN EQ LT PLUS MINUS TIMES OVER LPAREN RPAREN SEMI %token ERROR
%% /* Grammar for TINY */ program : stmt_seq { savedTree = $1;} ; stmt_seq : stmt_seq SEMI stmt { YYSTYPE t = $1; if (t != NULL) { while (t->sibling != NULL) t = t->sibling; t->sibling = $3; $$ = $1; } else $$ = $3; } | stmt { $$ = $1; } ; stmt : if_stmt { $$ = $1; } | repeat_stmt { $$ = $1; } | assign_stmt { $$ = $1; } | read_stmt { $$ = $1; } | write_stmt { $$ = $1; } | error { $$ = NULL; } ; if_stmt : IF exp THEN stmt_seq END { $$ = newStmtNode(IfK); $$->child[0] = $2; $$->child[1] = $4; } | IF exp THEN stmt_seq ELSE stmt_seq END { $$ = newStmtNode(IfK); $$->child[0] = $2; $$->child[1] = $4; $$->child[2] = $6; } ; repeat_stmt : REPEAT stmt_seq UNTIL exp { $$ = newStmtNode(RepeatK); $$->child[0] = $2; $$->child[1] = $4; } ; assign_stmt : ID { savedName = copyString(tokenString); savedLineNo = lineno; } ASSIGN exp { $$ = newStmtNode(AssignK); $$->child[0] = $4; $$->attr.name = savedName; $$->lineno = savedLineNo; } ; read_stmt : READ ID { $$ = newStmtNode(ReadK); $$->attr.name = copyString(tokenString); } ; write_stmt : WRITE exp { $$ = newStmtNode(WriteK); $$->child[0] = $2; } ; exp : simple_exp LT simple_exp { $$ = newExpNode(OpK); $$->child[0] = $1; $$->child[1] = $3; $$->attr.op = LT; } | simple_exp EQ simple_exp { $$ = newExpNode(OpK); $$->child[0] = $1; $$->child[1] = $3; $$->attr.op = EQ; } | simple_exp { $$ = $1; } ; simple_exp : simple_exp PLUS term { $$ = newExpNode(OpK); $$->child[0] = $1; $$->child[1] = $3; $$->attr.op = PLUS; } | simple_exp MINUS term { $$ = newExpNode(OpK); $$->child[0] = $1; $$->child[1] = $3; $$->attr.op = MINUS; } | term { $$ = $1; } ; term : term TIMES factor { $$ = newExpNode(OpK); $$->child[0] = $1; $$->child[1] = $3; $$->attr.op = TIMES; } | term OVER factor { $$ = newExpNode(OpK); $$->child[0] = $1; $$->child[1] = $3; $$->attr.op = OVER; } | factor { $$ = $1; } ; factor : LPAREN exp RPAREN { $$ = $2; } | NUM { $$ = newExpNode(ConstK); $$->attr.val = atoi(tokenString); } | ID { $$ = newExpNode(IdK); $$->attr.name = copyString(tokenString); } | error { $$ = NULL; } ;
%% int yyerror(char * message) { fprintf(listing,"Syntax error at line %d: %s/n",lineno,message); fprintf(listing,"Current token: "); printToken(yychar,tokenString); Error = TRUE; return 0; } |
{ Sample program in TINY language - computes factorial } read x; { input an integer } if 0 < x then { don't compute if x <= 0 } fact := 1; repeat fact := fact * x; x := x - 1 until x = 0; write fact { output factorial of x } end |
/*定义单词种类*/ typedef enum { ENDFILE,ERROR, IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE, ID,NUM, ASSIGN,EQ,LT,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI } TokenType; /*定义结点类型:是语句结点还是表达式结点*/ typedef enum {StmtK,ExpK} NodeKind; /*定义语句类别:if,repeat,assign,read,write语句*/ typedef enum {IfK,RepeatK,AssignK,ReadK,WriteK} StmtKind; /*定义表达式类别*/ typedef enum {OpK,ConstK,IdK} ExpKind; typedef enum {Void,Integer,Boolean} ExpType;
#define MAXCHILDREN 3 typedef struct treeNode { struct treeNode * child[MAXCHILDREN]; /*子孩子指针数组*/ struct treeNode * sibling; /*右兄弟指针*/ int lineno; NodeKind nodekind; union { StmtKind stmt; ExpKind exp; } kind; union { TokenType op; int val; char * name; } attr; ExpType type; } TreeNode; |
然后在看一下词法扫描程序: SCAN.C
SCAN.C /*定义的状态*/ typedef enum { START, /*初始状态*/ INASSIGN, /*进入到赋值状态*/ INCOMMENT, /*进入到注释状态*/ INNUM, /*进入到数字状态*/ INID, /*进入到标志符状态*/ DONE /*状态结束*/ }StateType; /*每当语法分析程序需要一个单词时,就调用该子程序,得到 (类别码,单词的值)*/ TokenType getToken(void) { int tokenStringIndex = 0; TokenType currentToken; StateType state = START; int save; while (state != DONE) { int c = getNextChar(); /*从输入buf中读入一个字符*/ save = TRUE; switch (state) { case START: if (isdigit(c)) state = INNUM; else if (isalpha(c)) /*判断字母*/ state = INID; else if (c == ':') state = INASSIGN; else if ((c == ' ') || (c == '/t') || (c == '/n')) save = FALSE; else if (c == '{') { save = FALSE; state = INCOMMENT; } else { state = DONE; switch (c) { case EOF: save = FALSE; currentToken = ENDFILE; break; case '=': currentToken = EQ; break; case '<': currentToken = LT; break; case '+': currentToken = PLUS; break; case '-': currentToken = MINUS; break; case '*': currentToken = TIMES; break; case '/': currentToken = OVER; break; case '(': currentToken = LPAREN; break; case ')': currentToken = RPAREN; break; case ';': currentToken = SEMI; break; default: currentToken = ERROR; break; } } break; case INCOMMENT: save = FALSE; if (c == EOF) { state = DONE; currentToken = ENDFILE; } else if (c == '}') state = START; break; case INASSIGN: state = DONE; if (c == '=') currentToken = ASSIGN; else { /* backup in the input */ ungetNextChar(); save = FALSE; currentToken = ERROR; } break; case INNUM: if (!isdigit(c)) { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = NUM; } break; case INID: if (!isalpha(c)) { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = ID; } break; case DONE: default: /* should never happen */ fprintf(listing,"Scanner Bug: state= %d/n",state); state = DONE; currentToken = ERROR; break; } if ((save) && (tokenStringIndex <= MAXTOKENLEN)) { tokenString[tokenStringIndex++] = (char) c; } /*解析单词结束*/ if (state == DONE) { tokenString[tokenStringIndex] = '/0'; if (currentToken == ID) { currentToken = reservedLookup(tokenString); } } } if (TraceScan) { fprintf(listing,"/t%d: ",lineno); printToken(currentToken,tokenString); } return currentToken; } |
static TokenType token; /* 全局变量 */ static TreeNode * stmt_sequence(void); static TreeNode * statement(void); static TreeNode * if_stmt(void); static TreeNode * repeat_stmt(void); static TreeNode * assign_stmt(void); static TreeNode * read_stmt(void); static TreeNode * write_stmt(void); static TreeNode * exp(void); static TreeNode * simple_exp(void); static TreeNode * term(void); static TreeNode * factor(void);
static void syntaxError(char * message) { fprintf(listing,"/n>>> "); fprintf(listing,"Syntax error at line %d: %s",lineno,message); Error = TRUE; } static void match(TokenType expected) { //匹配分出的单词,如果匹配的话,取下一个单词 if (token == expected) { token = getToken(); } else { syntaxError("unexpected token -> "); printToken(token,tokenString); fprintf(listing," "); } } TreeNode * stmt_sequence(void) { //形成一棵以第一条语句开始的参数语法树 TreeNode * t = statement(); TreeNode * p = t; while ((token!=ENDFILE) && (token!=END) && (token!=ELSE) && (token!=UNTIL)) { TreeNode * q; match(SEMI); q = statement(); if (q!=NULL) { if (t==NULL) t = p = q; else { p->sibling = q; //下一个语句是右兄弟结点,形成同一层级 p = q; } } } return t; } TreeNode * statement(void) { //对五种语句类型分别处理 TreeNode * t = NULL; switch (token) { case IF : t = if_stmt(); break; case REPEAT : t = repeat_stmt(); break; case ID : t = assign_stmt(); break; case READ : t = read_stmt(); break; case WRITE : t = write_stmt(); break; default : syntaxError("unexpected token -> "); printToken(token,tokenString); token = getToken(); break; } /* end case */ return t; } TreeNode * if_stmt(void) { //对文法:IF exp THEN stmt_seq END //IF exp THEN stmt_seq ELSE stmt_seq END //的处理 TreeNode * t = newStmtNode(IfK); match(IF); if (t!=NULL) t->child[0] = exp(); match(THEN); if (t!=NULL) t->child[1] = stmt_sequence(); if (token==ELSE) { match(ELSE); if (t!=NULL) t->child[2] = stmt_sequence(); } match(END); return t; } TreeNode * repeat_stmt(void) { //对文法:REPEAT stmt_seq UNTIL exp //的处理 TreeNode * t = newStmtNode(RepeatK); match(REPEAT); if (t!=NULL) t->child[0] = stmt_sequence(); match(UNTIL); if (t!=NULL) t->child[1] = exp(); return t; } TreeNode * assign_stmt(void) { //对赋值语句的处理 //文法:ID ASSIGN exp TreeNode * t = newStmtNode(AssignK); if ((t!=NULL) && (token==ID)) { t->attr.name = copyString(tokenString); } match(ID); match(ASSIGN); if (t!=NULL) t->child[0] = exp(); return t; } TreeNode * read_stmt(void) { //对文法: READ ID //的处理 TreeNode * t = newStmtNode(ReadK); match(READ); if ((t!=NULL) && (token==ID)) t->attr.name = copyString(tokenString); match(ID); return t; } TreeNode * write_stmt(void) { //对文法: WRITE exp //的处理 TreeNode * t = newStmtNode(WriteK); match(WRITE); if (t!=NULL) t->child[0] = exp(); return t; } TreeNode * exp(void) { //对文法:exp: simple_exp LT simple_exp // | simple_exp EQ simple_exp // | simple_exp //的处理 //先生成了左边的子表达式 TreeNode * t = simple_exp(); if ((token==LT)||(token==EQ)) { TreeNode * p = newExpNode(OpK); //操作符表达式结点 if (p!=NULL) { p->child[0] = t; p->attr.op = token; //操作符类型 t = p; //t是需要返回的 } match(token); if (t!=NULL) { t->child[1] = simple_exp(); //在生成右边的子表达式 } } return t; } TreeNode * simple_exp(void) { //对文法:simple_exp : simple_exp PLUS term // | simple_exp MINUS term // | term //的处理 //先生成term项 TreeNode * t = term(); while ((token==PLUS)||(token==MINUS)) { TreeNode * p = newExpNode(OpK); //同样是构造操作符表达式结点 if (p!=NULL) { p->child[0] = t; p->attr.op = token; t = p; match(token); t->child[1] = term(); } } return t; } TreeNode * term(void) { //对文法: term : term TIMES factor // | term OVER factor // | factor //的处理 TreeNode * t = factor(); while ((token==TIMES)||(token==OVER)) { TreeNode * p = newExpNode(OpK); //同样的处理方法 if (p!=NULL) { p->child[0] = t; p->attr.op = token; t = p; match(token); p->child[1] = factor(); } } return t; } TreeNode * factor(void) { //对文法: factor : LPAREN exp RPAREN // | NUM // | ID //的处理 //判断单词的类型 TreeNode * t = NULL; switch (token) { case NUM : t = newExpNode(ConstK); if ((t!=NULL) && (token==NUM)) t->attr.val = atoi(tokenString); match(NUM); break; case ID : t = newExpNode(IdK); if ((t!=NULL) && (token==ID)) t->attr.name = copyString(tokenString); match(ID); break; case LPAREN : match(LPAREN); t = exp(); match(RPAREN); break; default: syntaxError("unexpected token -> "); printToken(token,tokenString); token = getToken(); break; } return t; } TreeNode * parse(void) { TreeNode * t; token = getToken(); t = stmt_sequence(); if (token!=ENDFILE) syntaxError("Code ends before file/n"); return t; } |
main( int argc, char * argv[] ) { TreeNode * syntaxTree; char pgm[120]; /* source code file name */ if (argc != 2){ fprintf(stderr,"usage: %s <filename>/n",argv[0]); exit(1); } strcpy(pgm,argv[1]) ; if (strchr (pgm, '.') == NULL){ strcat(pgm,".tny"); } source = fopen(pgm,"r"); if (source==NULL){ fprintf(stderr,"File %s not found/n",pgm); exit(1); } listing = stdout; fprintf(listing,"/nTINY COMPILATION: %s/n",pgm); syntaxTree = parse(); /*调用PARSE.C里的子程序*/ if (TraceParse) { fprintf(listing,"/nSyntax tree:/n"); printTree(syntaxTree); } } |