!!!说明,本文源码参考github:https://github.com/TaronLedgerS/MyCompiler,其中本人对于语法分析器改动了一小部分,另外由于作者水平有限,学不会windows界面编程,故语义分析和绘图模块是用python实现的,由于c++和python都是面向对象语言,故将词法分析,语法分析模块转成等价python语言应该不会很难。作者认为,词法分析和语法分析这两部分是核心,只要掌握了语法分析树的创建,那么用任何语言提供的第三方库去实现画图都是可以的。
1.定义了记号种类,记号类,词法分析器类1.记号种类Token_Type:
ORIGIN,SCALE,ROT,IS,FOR,FROM,TO,STEP,DRAW, //关键字
T //参数
SEMICO,L_BRACKET,R_BRACKET,COMMA,//分隔符
PLUS,MINUS,MUL,DIV,POWER,//运算符
FUNC,//函数
CONST_ID,//常量
NONTOKEN,ERRTOKEN//空符号,出错符
2.记号类
Token_Type type,//记号种类
char *lexeme,//记号原始串
double value,//常数的值
double (*func_ptr)(double)//函数指针
3.词法分析器类 Scanner
属性:
FILE * InFile; //输入文件流
unsigned int LineNo; //记号所在行号
char TokenStr[Token_Maxlen]; //记号缓冲区
方法:
MyScanner() { LineNo = 1; };
~MyScanner() {};
int InitScanner(const char *FileName); //初始化
void CloseScanner(); //关闭
unsigned int GetLineNo(); //获取记号所在行号
Token GetToken(); //获取记号
char GetChar(); //从输入读入一个字符
void BackChar(char NextChar); //把字符回退到输入中
void AddToTokenStr(char NextChar); //识别到的记号加入到记号缓冲中
Token InTokenTable(const char *c_str); //判断是否在符号表中
void ClearTokenStr(); //清空记号缓冲
========================================================
1.符号表数组TokeTable[],每个元素都是记号类
//获取文件输入流初始化InFile
InitScanner(const char * FileName)
//关闭文件
CloseScanner()
//返回当前行号
unsigned int MyScanner::GetLineNo()
//从文件输入流中读取一个字符并转为大写
char MyScanner::GetChar()
//将字符退回到文件输入流中
void MyScanner::BackChar(char NextChar)
//加入字符到记号缓冲区中
void MyScanner::AddToTokenStr(char NextChar)
//清空记号串
void MyScanner::ClearTokenStr()
//判断给定字符串是否在符号表中
Token MyScanner::InTokenTable(const char * c_str)
//识别一个记号【1.字母---保留字、参数,PI, E 2.数字--常量 3.运算符】
Token MyScanner::GetToken()
//a.先过滤掉空白字符和换行,成功读入第一个字符并加入记号缓冲区;
//b.如果第一个字符是字母,则可能是关键字,参数,PI,E,读入完成后,看该原始记号串是否在符号表中,存在返回该记号,不存在返回错误记号ErrToken;
//c.如果第一个字符是数字,则只能是数(也包含小数),当读入到小数点时,可接着从缓冲区中读小数部分;
//d.还可能读入的是运算符,这里读入-可能是减号或注释,/可能是除号或注释,*可能是乘号或幂运算,其他则是出错记号ErrToken;
typedef struct TreeNodeStruct { //表达式的语法树节点类型
enum Token_Type OpCode; //接收记号的种类
union {
struct { TreeNodeStruct *left, *right; } CaseOp; //二元运算节点
struct { TreeNodeStruct *child; func_ptr mathfuncptr; } CaseFunc; //函数调用节点
double CaseConst; //常数节点
double *CasePara; //参数节点
} content;
} *TreeNode;
class MyParser {
private:
double TPara; //参数T的值
Token token; //记号
double Origin_x=0,Origin_y=0; //横,纵平移距离
double Scale_x=1,Scale_y=1; //横,纵比例因子
double Rot_angle=0; //旋转角度
double Start,End,Step; //for语句的起点,终点,步长
MyScanner Scanner;
int indent;// 用于缩进
public:
MyParser();
~MyParser();
//辅助子程序
void FetchToken(); //获取记号,调用词法分析器的GetToken,并将当前记号保存起来;
void MatchToken(enum Token_Type ttoken); //匹配记号
void MatchToken(enum Token_Type ttoken, char * text);
void SyntaxError(int case_of); //语法错误处理,调用ErrorMsg
virtual void ErrorMsg(int line, char * sourcetext, char* descrip); //MySemantics
TreeNode MakeTreeNode(enum Token_Type opcode, ...); //构造语法树
void PrintSyntaxTree(TreeNode root, int indent); //打印语法树
double GetExprValue(TreeNode root);//计算表达式的值
void InitParser(char *FileName);//语法分析器接口 //根据BNCF构造的非终结符递归子程序
//主要产生式的递归子程序,可明显分为两类
//函数绘图语言的语句部分(for,origin,rot,scale语句等)仅进行语法分析,无需构造语法树,因此函数设计为void类型
void Program();
void Statement();
virtual void ForStatement();
virtual void OriginStatement();
virtual void RotStatement();
virtual void ScaleStatement();
//对表达式进行语法分析时还要为其构造语法树,因此函数返回值设计为指向语法树节点的指针。
TreeNode Expression();
TreeNode Term();
TreeNode Factor();
TreeNode Component();
TreeNode Atom();
virtual void Enter(char* x);//进入语句
virtual void Back(char*x);//退出语句
virtual void CallMatch(char*x);//匹配终结符
virtual void TreeTrace(TreeNode x);//树的踪迹
void passToSemantics();
origin is (100,300); rot us 0; scale is (1,1) for T from 0 to 200 step 1 draw (t,0);
G1
Program-->ε| Program Statement SEMICO
Statement-->OriginStatement|ScaleStatement|RotStatement|ForStatement
//origin is (100,300);
OriginStatement-->ORIGIN IS L_BRACKET Expression COMMA Expression R_BRACKET
//scale is (1,1)
ScaleStatement-->SCALE IS L_BRACKET Expression COMMA Expression R_BRACKET
//rot us 0;
RotStatement-->ROT IS Expression
//for T from 0 to 200 step 1 draw (t,0);
ForStatement-->FOR T FROM expression TO Expression STEP Expression
DRAW L_BRACKET Expression COMMA Expression R_BRACKET
Expression-->Expression PLUS Expression //a+b
|Expression MINUS Expression //a-b
|Expression MUL Expression //a*b
|Expression DIV Expression //a/b
|PLUS Expression //+b
|MINUS Expression //-b
|Expression POWER Expression //a**b
|CONST_ID //常数
|T
|FUNC L_BRACKET Expression R_BRACKET //sin(t)
|L_BRACKET Expression R_BRACKET //(a)
====================================================================================
G2 (G1中Expression产生式是二义的,因为它没有区分运算的优先级和结合性,即所有的候选项均具有同等的指导分析的权利,从而造成有些分析步骤的不确定性)
改写文法,根据运算的优先级将Expression的所有候选项分组,具有相同优先级的候选项被分在同一个组里,并为每一个组引入一个非终结符
从上至下,结合性依次升高
表达式中的运算 | 结合性 | 非终结符 |
---|---|---|
(二元)PLUS,MINUS | 左结合 | Expression |
MUL,DIV | 左结合 | Term |
(一元) PLUS,MINUS | 右结合 | Factor |
POWER | 右结合 | Component |
(原子表达式)(常数,参数,函数,括号) | 无 | Atom |
Program-->ε| Program Statement SEMICO
Statement-->OriginStatement|ScaleStatement|RotStatement|ForStatement
//origin is (100,300);
OriginStatement-->ORIGIN IS L_BRACKET Expression COMMA Expression R_BRACKET
//scale is (1,1)
ScaleStatement-->SCALE IS L_BRACKET Expression COMMA Expression R_BRACKET
//rot us 0;
RotStatement-->ROT IS Expression
//for T from 0 to 200 step 1 draw (t,0);
ForStatement-->FOR T FROM expression TO Expression STEP Expression
DRAW L_BRACKET Expression COMMA Expression R_BRACKET
Expression-->Expression PLUS Term
|Expression MINUS Term
|Term
Term-->Term MUL Factor
|Term DIV Factor
|Factor
Factor-->PLUS Factor
|MINUS Factor
|Component
Component-->Atom POWER Component
|Atom
Atom-->CONST_ID
|T
|FUNC L_BRACKET Expression R_BRACKET
|L_BRACKET Expression R_BRACKET
====================================================================================
G3(G2中Program,Expression,Term含有左递归,G3是消除左递归和左因子的文法)
Program-->Statement SEMICO Program|ε
Expression-->Term Expression`
Expression`--> PLUS Term Expression`|MINUS Term Expression`|ε
Term-->Factor Term`
Term`-->MUL Factor Term`|DIV Factor Term`|ε
====================================================================================
G4(编写适合递归下降子程序的文法) {},[],|,()
例如:Expression--> PLUS Term Expression|MINUS Term Expression`|ε
==> (PLUS|MINUS) Term Expression`| ε
Expression ==> {(PLUS|MINUS) Term}
Program-->{Statement SEMICO} 重复0次或若干次
Statement-->OriginStatement|ScaleStatement|RotStatement|ForStatement
//origin is (100,300);
OriginStatement-->ORIGIN IS L_BRACKET Expression COMMA Expression R_BRACKET
//scale is (1,1)
ScaleStatement-->SCALE IS L_BRACKET Expression COMMA Expression R_BRACKET
//rot us 0;
RotStatement-->ROT IS Expression
//for T from 0 to 200 step 1 draw (t,0);
ForStatement-->FOR T FROM expression TO Expression STEP Expression
DRAW L_BRACKET Expression COMMA Expression R_BRACKET
Expression-->Term {(PLUS|MINUS) Term}
Term-->Factor {(MUL|DIV) Factor}
Factor-->PLUS Factor
|MINUS Factor
|Component
//保持产生式的右递归形式,在程序中通过递归调用实现右边的运算符的先计算
Component-->Atom POWER Component
|Atom
Atom-->CONST_ID
|T
|FUNC L_BRACKET Expression R_BRACKET
|L_BRACKET Expression R_BRACKET
======================================================================================
表达式的语法树
1.语法树的节点可以分为以下三类:
叶节点:存放原子表达式,(如常数或参数T)
两个孩子的内部节点:用于存放二元运算构成的表达式。为了简化处理,将1元加退化为叶节点(+5-->5),1元减转化为左操作数为0.0的二元减(-5-->0.0-5)
一个孩子的内部节点:用于存放函数调用如sin(t)等构成的表达式 -16+5**3/cos(T)
+ - / 0.0 16 ** cos | 5 3 T
========================================================
生成语法树节点
//生成语法树节点 四类情况
TreeNode MyParser::MakeTreeNode(Token_Type opcode, ...) {
//初始化节点
TreeNode TP = NULL;
TP = new TreeNodeStruct;
TP->OpCode = opcode;
va_list arg_ptr;//变参列表(指针)
va_start(arg_ptr, opcode);
switch (opcode) { //四类节点
case CONST_ID: //常量
TP->content.CaseConst = (double)va_arg(arg_ptr, double);
break;
case T: //参数T
TP->content.CasePara = &TPara; //TPara作用域MyParser
break;
case FUNC: //函数
TP->content.CaseFunc.mathfuncptr = (func_ptr)va_arg(arg_ptr, func_ptr);
TP->content.CaseFunc.child = (TreeNode)va_arg(arg_ptr, TreeNode);
break;
default: //二元运算
TP->content.CaseOp.left = (TreeNode)va_arg(arg_ptr, TreeNode);
TP->content.CaseOp.right = (TreeNode)va_arg(arg_ptr, TreeNode);
break;
}
va_end(arg_ptr);
return TP;
}
根据EBNF所得的递归子程序
//根据EBNF所得的递归子程序
void MyParser::Program() {//Program -> {Statement ;} 重复0次或若干次,用while循环
while (token.type != NONTOKEN)
{
Statement();
MatchToken(SEMICO, ";");
}
}
void MyParser::Statement() { //Statement->OriginStatement|ScaleStatement|RotStatement|ForStatement 候选项之间的或关系,并列路径,用case语句
switch (token.type) {
case ORIGIN:OriginStatement(); break;
case SCALE:ScaleStatement(); break;
case ROT:RotStatement(); break;
case FOR:ForStatement(); break;
default: SyntaxError(2); break;
}
}
void MyParser::OriginStatement() {
//OriginStatment → ORIGIN IS
// L_BRACKET Expression COMMA Expression R_BRACKET
TreeNode tmp = NULL;
MatchToken(ORIGIN, "ORIGIN");
MatchToken(IS, "IS");
MatchToken(L_BRACKET, "(");
tmp = Expression(); TreeTrace(tmp); //先序打印语法分析树;
Origin_x=GetExprValue(tmp);
MatchToken(COMMA, ",");
tmp = Expression(); TreeTrace(tmp); //先序打印语法分析树;
Origin_y=GetExprValue(tmp);
MatchToken(R_BRACKET, ")");
}
void MyParser::ScaleStatement() {
//ScaleStatment → SCALE IS
// L_BRACKET Expression COMMA Expression R_BRACKET
TreeNode tmp = NULL;
MatchToken(SCALE, "SCALE");
MatchToken(IS, "IS");
MatchToken(L_BRACKET, "(");
tmp = Expression(); TreeTrace(tmp);
Scale_x=GetExprValue(tmp);
MatchToken(COMMA, ",");
tmp = Expression(); TreeTrace(tmp);
Scale_y=GetExprValue(tmp);
MatchToken(R_BRACKET, ")");
}
void MyParser::RotStatement() {
//RotStatment → ROT IS Expression
TreeNode tmp = NULL;
MatchToken(ROT, "ROT");
MatchToken(IS, "IS");
tmp = Expression(); TreeTrace(tmp);
Rot_angle=GetExprValue(tmp);
}
void MyParser::ForStatement() {
//ForStatement ->FOR T FROM Expression TO Expression STEP Expression
// DRAW (Expression,Expression)
TreeNode StartPtr,EndPtr,StepPtr,XPtr,YPtr;
StartPtr=EndPtr=StepPtr=XPtr=YPtr=NULL;///起点,终点,步长横,纵坐标表达式的语法树
MatchToken(FOR, "FOR");
MatchToken(T, "T");
MatchToken(FROM, "FROM");
StartPtr = Expression(); TreeTrace(StartPtr);
Start=GetExprValue(StartPtr);
MatchToken(TO, "TO");
EndPtr = Expression(); TreeTrace(EndPtr);
End=GetExprValue(EndPtr);
MatchToken(STEP, "STEP");
StepPtr = Expression(); TreeTrace(StepPtr);
Step=GetExprValue(StepPtr);
MatchToken(DRAW, "DRAW");
MatchToken(L_BRACKET, "(");
XPtr = Expression(); TreeTrace(XPtr);
MatchToken(COMMA, ",");
YPtr = Expression(); TreeTrace(YPtr);
MatchToken(R_BRACKET, ")");
//DrawLoop(Start,End,Step,x_ptr,y_ptr);
}
//表达式的语法树的递归构造
TreeNode MyParser::Expression() {
//Expression → Term { ( PLUS | MINUS) Term }
TreeNode left, right;
Token_Type token_temp; //当前记号类型
left = Term(); //左操作数的语法树
while (token.type == PLUS || token.type == MINUS) //左结合
{
token_temp = token.type;
MatchToken(token_temp); //第一类匹配
right = Term(); //分析右操作数且得到其语法树
left = MakeTreeNode(token_temp, left, right);
// 构造运算的语法树,结果为左子树
}
return left;
}
TreeNode MyParser::Term() {
//Term → Factor { ( MUL | DIV ) Factor }
TreeNode left, right;
Token_Type token_tmp;
left = Factor();
while (token.type == MUL || token.type == DIV)//左结合
{
token_tmp = token.type;
MatchToken(token_tmp);
right = Factor();
left = MakeTreeNode(token_tmp, left, right);
}
return left;
}
TreeNode MyParser::Factor() {
//Factor → ( PLUS | MINUS ) Factor | Component
TreeNode left, right;
if (token.type == PLUS) //匹配一元加,表达式退化为仅有右操作数的表达式
{
MatchToken(PLUS);
right = Factor();
}
else
if (token.type == MINUS) //匹配一元减-->二元0-factor(转换成二元减,第一操作数即左子树为常数0.0)
{
MatchToken(MINUS);
right = Factor();
left = new TreeNodeStruct;
left->OpCode = CONST_ID;
left->content.CaseConst = 0.0;
right = MakeTreeNode(MINUS, left, right);
}
else
right = Component(); //匹配非终结符Component
return right;//右结合
}
TreeNode MyParser::Component() {
//Component → Atom [ POWER Component ] //[]可被绕过的路径
TreeNode left, right;
left = Atom();
if (token.type == POWER)//右结合
{
MatchToken(POWER);
right = Component(); //递归调用以实现Power的右结合性!!
left = MakeTreeNode(POWER, left, right);
}
return left;
}
TreeNode MyParser::Atom() {
// Atom → CONST_ID
// | T
// | FUNC L_BRACKET Expression R_BRACKET
// | L_BRACKET Expression R_BRACKET
//常量,参数,函数,括号
Token t = token;
TreeNode TP = NULL, tmp;
switch (token.type) {
case CONST_ID:
MatchToken(CONST_ID); TP = MakeTreeNode(CONST_ID, t.value);
break;
case T:
MatchToken(T); TP = MakeTreeNode(T);
break;
case FUNC:
MatchToken(FUNC);
MatchToken(L_BRACKET);
tmp = Expression(); TP = MakeTreeNode(FUNC, t.func_ptr, tmp);
MatchToken(R_BRACKET);
break;
case L_BRACKET:
MatchToken(L_BRACKET);
TP = Expression();
MatchToken(R_BRACKET);
break;
default:
SyntaxError(2);
break;
}
return TP;
}
获取表达式(语法分析数的形式表示)的值
double GetExprValue(TreeNode root){
if(root==NULL) return 0.0;
switch(root->OpCode){
case PLUS:
return GetExprValue(root->content.CaseOp.left)+
GetExprValue(root->content.CaseOp.right);break;
case MINUS:
return GetExprValue(root->content.CaseOp.left)-
GetExprValue(root->content.CaseOp.right);break;
case MUL:
return GetExprValue(root->content.CaseOp.left)*
GetExprValue(root->content.CaseOp.right);break;
case DIV:
return GetExprValue(root->content.CaseOp.left)/
GetExprValue(root->content.CaseOp.right);break;
case POWER:
return pow(GetExprValue(root->content.CaseOp.left),
GetExprValue(root->content.CaseOp.right));break;
case FUNC:
return(*root->content.CaseFunc.mathfuncptr)
(GetExprValue(root->content.CaseFunc.child));break;
case CONST_ID:
return root->content.CaseConst;break;
case T:
return *(root->content.CasePara);break;
default:
return 0.0; break;
}
}
用语法分析模块传来的横纵坐标平移量,横纵坐标比和旋转角度来初始化坐标系
def set(Origin_x=0.0, Origin_y=0.0, Scale_x=1.0, Scale_y=1.0, Rot_angle=0.0):
Painter.orx = Origin_x
Painter.ory = Origin_y
Painter.scx = Scale_x
Painter.scy = Scale_y
Painter.ang = Rot_angle
画点,包含坐标变换,比例变换和旋转变换等
def paint(T_start, T_end, T_step, Point_x, Point_y):
T_value = T_start
while T_value<=T_end:
ExpNode.T_value = T_value
x = Point_x.getValue()
y = Point_y.getValue()
# 比例变换
x, y = x*Painter.scx, y*Painter.scy
# 旋转变换
x, y = x*math.cos(Painter.ang) + y*math.sin(Painter.ang),
y*math.cos(Painter.ang) - x*math.sin(Painter.ang)
# 平移变换
x, y = x+Painter.orx, y+Painter.ory
# points.append((x, y))
Painter.Points['X'].append(x)
Painter.Points['Y'].append(y)
T_value += T_step
测试文件 2.txt
词法分析器
语法分析器
语义分析器(该文件是另一个测试文件)