词法分析:
关键字定义:
/* This enum contains all the keywords and operators
* used in the language.
*/
enum
{
/* Keywords */
KW_BREAK = 1000, /* "break" keyword */
KW_CASE, /* "case" keyword */
KW_CONTINUE, /* "continue" keyword */
KW_DEFAULT, /* "default" keyword */
KW_DO, /* "do" keyword */
KW_ELSE, /* "else" keyword */
KW_EXTERN, /* "extern" keyword */
KW_GOTO, /* "goto" keyword */
KW_IF, /* "if" keyword */
KW_LABEL, /* "label" keyword */
KW_MODULE, /* "module" keyword */
KW_RETURN, /* "return"keyword */
KW_START, /* "start" keyword */
KW_SWITCH, /* "switch" keyword */
KW_WHILE, /* "while" keyword */
/* Type identifiers */
KW_BOOL, /* "bool" identifier */
KW_CHAR, /* "char" identifier */
KW_FLOAT, /* "float" identifier */
KW_INT, /* "int" identifier */
KW_UNTYPED, /* "untyped" identifier */
KW_VOID, /* "void" identifier */
/* Variable lexer tokens */
LIT_BOOL, /* bool constant */
LIT_CHAR, /* character constant */
LIT_FLOAT, /* floating point constant */
LIT_INT, /* integer constant */
LIT_STRING, /* string constant */
IDENTIFIER, /* identifier */
/* Operators */
OP_ADD, /* "+" */
OP_ASSIGN, /* "=" */
OP_BITWISE_AND, /* "&" */
OP_BITWISE_COMPLEMENT, /* "~" */
OP_BITWISE_LSHIFT, /* "<<" */
OP_BITWISE_OR, /* "|" */
OP_BITWISE_RSHIFT, /* ">>" */
OP_BITWISE_XOR, /* "^" */
OP_DIVIDE, /* "/" */
OP_EQUAL, /* "==" */
OP_GREATER, /* ">" */
OP_GREATEREQUAL, /* ">=" */
OP_LESS, /* "<" */
OP_LESSEQUAL, /* "<=" */
OP_LOGICAL_AND, /* "&&" */
OP_LOGICAL_OR, /* "||" */
OP_MODULUS, /* "%" */
OP_MULTIPLY, /* "*" */
OP_NOT, /* "!" */
OP_NOTEQUAL, /* "!=" */
OP_SUBTRACT, /* "-" */
OP_TERNARY_IF, /* "?" */
/* Delimiters */
ARROW, /* "->" */
LBRACE, /* "{" */
RBRACE, /* "}" */
LBRACKET, /* "[" */
RBRACKET, /* "]" */
COLON, /* ":" */
COMMA, /* "," */
LPAREN, /* "(" */
RPAREN, /* ")" */
SEMICOLON /* ";" */
}
tokens;
处理inger中的各种数据类型和标识符,如BOOL, unsigned long, float, char*,标识符等
typedef union
{
unsigned long uintvalue;
BOOL boolvalue;
char *stringvalue;
char charvalue;
float floatvalue;
char *identifier;
} Tokenvalue;
树节点结构(注意:树节点和抽象语法树节点是不同的):
typedef struct TreeNode
{
void *data;
int screenX;
struct TreeNode *parent;
List *children; //一系列孩子
} TreeNode;
抽象语法树节点结构:
typedef struct AstNode
{
int id; //表示节点的类型,如while节点,module节点
Tokenvalue val;
Type *type;
int lineno;
} AstNode;
抽象语法树的AstNode作为TreeNode的data成员保存,参考如下函数:
//参数id表示节点名,如:NODE_MODULE,NODE_GLOBAL等见nodenames.h
TreeNode *CreateAstNode( int id, int lineno )
{
TreeNode *treeNode;
AstNode *astNode;
astNode = (AstNode *) MallocEx( sizeof( AstNode ) );
astNode->id = id;
astNode->lineno = lineno;
astNode->val.uintvalue = 0;
astNode->type = NULL;
treeNode = CreateTreeNode( astNode );
return( treeNode );
}
或者:
TreeNode *CreateAstNodeVal( int id, Tokenvalue val, int lineno )
{
TreeNode *treeNode;
AstNode *astNode;
astNode = (AstNode *) MallocEx( sizeof( AstNode ) );
astNode->id = id;
astNode->lineno = lineno;
astNode->val = val;
astNode->type = NULL;
treeNode = CreateTreeNode( astNode );
return( treeNode );
}
//抽象语法树节点名
enum NodeNames
{
NODE_MODULE = 0,
NODE_START,
NODE_EXTERN,
NODE_GLOBAL,
NODE_FUNCTION,
NODE_FUNCTIONHEADER,
NODE_MODIFIERS,
NODE_PARAMLIST,
NODE_PARAMBLOCK,
NODE_PARAM,
NODE_RETURNTYPE,
NODE_DIMENSION,
NODE_DIMENSIONBLOCK,
NODE_BLOCK,
NODE_STATEMENT,
NODE_SWITCH,
NODE_CASES,
NODE_CASE,
NODE_WHILE,
NODE_GOTO,
NODE_LABEL,
NODE_IF,
NODE_IDENT,
NODE_RETURN,
NODE_CONTINUE,
NODE_BREAK,
NODE_DECLBLOCK,
NODE_DECLARATION,
NODE_INITIALIZER,
NODE_INDEXBLOCK,
NODE_REFERENCE,
NODE_INDEX,
NODE_EXPRESSION,
NODE_LOGICAL_OR,
NODE_LOGICAL_AND,
NODE_BITWISE_OR,
NODE_BITWISE_XOR,
NODE_BITWISE_AND,
NODE_EQUAL,
NODE_NOTEQUAL,
NODE_GREATER,
NODE_GREATEREQUAL,
NODE_LESS,
NODE_LESSEQUAL,
NODE_BITWISE_LSHIFT,
NODE_BITWISE_RSHIFT,
NODE_ASSIGN,
NODE_BINARY_ADD,
NODE_BINARY_SUBTRACT,
NODE_UNARY_ADD,
NODE_UNARY_SUBTRACT,
NODE_MULTIPLY,
NODE_DIVIDE,
NODE_MODULUS,
NODE_BITWISE_COMPLEMENT,
NODE_ADDRESS,
NODE_DEREFERENCE,
NODE_NOT,
NODE_APPLICATION,
NODE_INDEXER,
NODE_ARGUMENTS,
NODE_FACTOR,
NODE_BOOL,
NODE_CHAR,
NODE_FLOAT,
NODE_INT,
NODE_UNTYPED,
NODE_VOID,
NODE_LIT_BOOL,
NODE_LIT_CHAR,
NODE_LIT_FLOAT,
NODE_LIT_INT,
NODE_LIT_STRING,
NODE_LIT_IDENTIFIER,
NODE_INT_TO_FLOAT,
NODE_CHAR_TO_INT,
NODE_CHAR_TO_FLOAT,
NODE_UNKNOWN = -1
};
输出抽象语法树:
/*
* PRINTING ROUTINES
*/
void PrintAst( TreeNode *source )
{
PrintTree( source, GetAstNodeData, 4 );
}
PrintTree的实现如下,这里传递的参数levels等于4
void PrintTree( TreeNode *source, DataFunction dataFunction, int levels )
{
int printDepth = 0;
BOOL loop;
char *str;
int i;
/* TODO: We're going to have to make a new macro.
* Don't use DEBUG for this.
*/
DEBUG( "Called\n" );
/* If tree is empty, abort. */
if( source == NULL )
{
return;
}
/* Walk through tree to determine x-offsets for
* each node.
*/
LayoutTree( source, LEFT_OFFSET );
/* Print nodes. */
依次通过调用函数指针dataFunction所指向的函数(实际上是函数GetAstNodeData)
来输出每个节点的节点名,节点的token的值,类型名,以及行号,参考下面的GetAstNodeData函数
for( i = 0; i < levels; i++ )
{
str = dataFunction( source, i );
PrintChars( source->screenX - strlen( str ) / 2, ' ' );
printf( "%s\n", str );
}
PrintChars( source->screenX, ' ' );
printf( "%c", VERTBAR );
printDepth = 0;
do
{
currentX = 0;
printf("\n");
PrintNode( source, 0, printDepth, 0, dataFunction, 0 );
currentX = 0;
printf("\n");
PrintNode( source, 0, printDepth, 1, dataFunction, 0 );
currentX = 0;
printf("\n");
for( i = 0; i < levels; i++ )
{
PrintNode( source, 0, printDepth, 2, dataFunction, i );
currentX = 0;
printf("\n");
}
loop = PrintNode( source, 0, printDepth, 3, dataFunction, 0 );
printDepth++;
}
while( loop );
}
添加一个孩子的操作只需要将新节点插入到孩子链表尾部即可
void AddTreeChild( TreeNode *parentnode, TreeNode *node )
{
/* Do not act on an empty node. */
if( node == NULL ) return;
/* If the tree is empty, add the first root node. */
if( parentnode == NULL )
{
node->parent = NULL;
}
else
/* Tree is not empty. Add the new node to [parentnode]'s
* children list. */
{
node->parent = parentnode;
ListAppend( parentnode->children, node );
}
}
而RemoveAstNode则删除整个子树
/* Remove node [node] from ast. The node contents
* and its children get deleted.
*
* Pre: [node] is not NULL.
*/
void RemoveAstNode( TreeNode *node );
inger编译流程:
1. 预处理,关键函数为Preprocess
2. 语法分析,构建抽象语法树,关键函数Parse,Parse也是构造抽象语法树的入口函数,如果语法分析没有发现错误则跳到第3步
3. 根据抽象语法树来建立符号表关键函数为CreateSymbolTable( ast );
4. 语义分析,关键函数:
a) CheckLeftValues( ast );
b) CheckArgCount( ast );
c) CheckSwitchStatements( ast );
d) CheckFunctionReturns( ast );
5. 根据抽象语法树生成代码,关键函数GenerateCode( ast );