成功解决了字节码指令生成的问题,掌握一个原则:
1、数值常量expr100一律是MovImm #imm, R0,但把要不要执行Push R0推迟到expr20-60里做判断;
2、假设任何表达式的指令生成结果都对应于其值在R0里,要不要Push 当且仅当:
此子表达式出现在二元运算的左侧,且右侧需要递归。(注意,右侧如果只是一个数值常量的话,倒是可以直接MovImm #right_imm, R1,不需要对左侧的结果进行Push R0)
function AdvancedCalculator(){ //语法分析的原始输入流: this.tokens = [];//中缀带括号的, 3种语法分析输入单位:类型为String的(和)、类型为Number的value、类型为Object/String的运算符 this.tokens_scan_index = 0; this.saved_tokens_scan_index_stack = []; //用于栈式自动机直接求值的转换后的流: //this.tokens2_values = [];//后缀value栈 //this.tokens2_op = [];//运算符栈,由于去除了括号,所以只需要区分运算符是一元还是二元的 this.value_buffer = []; this.assember = new Assembler(); }
AdvancedCalculator.prototype = { //复杂的运算符定义为单独的Object: SQRT: "Sqrt", SIN: "Sin", COS: "Cos", TAN: "Tan", COT: "Cot", LOG: "Log", //以10为底 LN: "Ln", //以e为底 POW: "Pow",//x^y PI: Math.PI, //这是数值常量,不是运算符,不过也可以映射为0个输入的函数?? mapUnaryOperator2UnaryFunction: function(opToken){ if(opToken==this.SQRT) return Math.sqrt; else if(opToken==this.SIN) return Math.sin; else if(opToken==this.COS) return Math.cos; else if(opToken==this.TAN) return Math.tan; else if(opToken==this.COT) return function(a){return 1/Math.tan(a);}; else if(opToken==this.LOG) return Math.log10; else if(opToken==this.LN) return Math.log; else throw "未识别的一元运算符: "+opToken; }, mapBinaryOperator2BinaryFunction: function(opToken){ if(opToken==this.POW) return Math.pow; else if(opToken=="+") return function(a,b){return a+b;}; else if(opToken=="-") return function(a,b){return a-b;}; else if(opToken=="*") return function(a,b){return a*b;}; else if(opToken=="/") return function(a,b){return a/b;}; else throw "未识别的二元运算符: "+opToken; }, nextToken: function(){ if (this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length){ var token_next = this.tokens[this.tokens_scan_index++]; return token_next; } return null;//throw "错误的调用:token流已经结束"; }, hasMoreTokens: function(){ return this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length; }, pushTokenScanIndex: function(){ assert( this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length); this.saved_tokens_scan_index_stack.push(this.tokens_scan_index); return this.saved_tokens_scan_index_stack.length-1; }, popTokenScanIndexAt: function(stack_index){ assert( stack_index>=0 && stack_index<this.saved_tokens_scan_index_stack.length); while(this.saved_tokens_scan_index_stack.length>stack_index) this.tokens_scan_index = this.saved_tokens_scan_index_stack.pop(); }, popTokenScanIndex: function(){ this.tokens_scan_index = this.saved_tokens_scan_index_stack.pop(); }, discardLastTokenScanIndex: function(){ this.saved_tokens_scan_index_stack.pop(); }, isUnaryOperator: function(token){ return token==this.SIN || token== this.COS || token==this.TAN || token==this.COT || token==this.LOG || token==this.LN; }, isBinaryFunctionToken: function(token){//特殊的二元函数 return token==this.POW; }, isBinaryOperator: function(token){//所有的二元中缀操作符(包括二元函数) return token=="+" || token=="-" || token=="*" || token=="/" || this.isBinaryFunctionToken(token); }, isOperator: function(token){//返回:0/1单元运算符包括函数/2元运算符 if (this.isUnaryOperator(token)) return 1; if (this.isBinaryOperator(token)) return 2; return 0; }, emitToken: function(token){ this.tokens.push(token); }, emitValueTokenIfAny: function(){ //检查之前缓存的value_buffer if (this.value_buffer.length>0) { var value_str = this.value_buffer.join(''); var value = Number(value_str); //a Number this.emitToken(value); this.value_buffer = []; //reset; } }, emitButton: function(btn){ if (btn=="(" || btn==")"){//括号是一种特殊的优先级运算符 this.emitValueTokenIfAny(); this.emitToken(btn); } else if (this.isOperator(btn)){ this.emitValueTokenIfAny(); this.emitToken(btn); }else{//0,1,2,3,4,5,6,7,8,9,. this.value_buffer.push(btn); } }, emitButtons: function(btns){ for(var i=0; i<btns.length; ++i){ var btn = btns[i]; this.emitButton(btn); } }, //核心算法:如何把一个中缀的混合value和operator的流转换为分离的value和operator的求值栈? concat: function(target, source){ while(source.length>0){ var item = source.shift(); target.push(item); } }, evalExpr: function(){ return this.evalExpr20();//利用短路特性,前一个得到true的话后续子表达式不会执行 }, evalExpr100: function(){ if( !this.hasMoreTokens() ) return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误 this.pushTokenScanIndex(); var next_token = this.nextToken(); if(next_token==null) throw "流非正常结束,此处应有一数值value!"; assert(typeof next_token=="number"); if(typeof next_token=="number"){ this.assember.emitInstruction({type: "MovImm", arg: next_token, arg1: "R0"}); //this.assember.emitInstruction({type: "Push", arg: "R0"}); //正常情况下不需要push,只有发现此常量参与了一个二元原语函数的运算左端,而右端是一个需要递归的子表达式的时候 return [true,next_token]; } this.popTokenScanIndex(); return [false,]; }, evalExpr80: function(){//括号表达式: 似乎不需要特殊处理?因为它只是改变了子表达式的优先级而已 if( !this.hasMoreTokens() ) return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误 this.pushTokenScanIndex(); var next_token = this.nextToken(); if (next_token==null){//流已经结束 return false; } if (next_token=="("){ var result = this.evalExpr(); //if has ES6 destructing, can write as var [success, value] = ... if(!result[0]) throw "TODO: fixme";//此时saved_tokens_scan_index需要维护成一个栈了 var next_next_token = this.nextToken(); if(next_next_token==null) throw "流异常结束:expect a )"; assert( next_next_token==")" ); { //this.assember.emitInstruction({type: "Pop", arg: "R0"}); //this.assember.emitInstruction({type: "Push", arg: "R0"}); } return result; } //else: this.popTokenScanIndex(); var result = this.evalExpr100(); //this.assember.emitInstruction({type: "Pop", arg: "R0"}); return result; }, evalExpr60: function(){//一元函数 if( !this.hasMoreTokens() ) return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误 this.pushTokenScanIndex(); var next_token = this.nextToken(); if (next_token==null) { throw "流异常结束:期望一个Expr60"; } if(this.isUnaryOperator(next_token)){ var unaryOp = next_token; var result = this.evalExpr80(); if (!result[0]) throw "非法表达式!";//此时saved_tokens_scan_index需要维护成一个栈了, TODO: 支持 sin sin 1的语法? var unaryFunc = this.mapUnaryOperator2UnaryFunction(unaryOp); { //this.assember.emitInstruction({type: "Pop", arg: "R0"}); this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token});; //this.assember.emitInstruction({type: "Push", arg: "R0"}); } return [true, unaryFunc(result[1])]; } this.popTokenScanIndex(); var result = this.evalExpr80(); //this.assember.emitInstruction({type: "Push", arg: "R0"}); return result; }, evalExpr50: function(){//二元函数,如x^y(Pow求幂) if( !this.hasMoreTokens() ) return [false,]; //expr50 := expr60 x^y expr50 | expr60 var result = this.evalExpr60(); if (result[0]) { var tmp_value = result[1]; if( !this.hasMoreTokens() ) { //this.assember.emitInstruction({type: "Push", arg: "R0"}) return [true, tmp_value]; } this.pushTokenScanIndex(); var next_token = this.nextToken();//should use let; while(this.isBinaryFunctionToken(next_token)){ //右递归之前,需要将当前的R0压栈: this.assember.emitInstruction({type: "Push", arg: "R0"}); var result2 = this.evalExpr50(); if (!result2[0]) { //Here: 二元函数运算符(如x^y)已经匹配,但右边的子表达式不匹配,则输入无效 throw "Input Invalid"; } //这里的递归已经处理了结合性的问题 var binFunc = this.mapBinaryOperator2BinaryFunction(next_token); tmp_value = binFunc(tmp_value, result2[1]); { //将当前右递归的运算结果(R0)移动到R1: this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"}); //将之前压栈的左侧值出栈: this.assember.emitInstruction({type: "Pop", arg: "R0"}); this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token}); } // this.discardLastTokenScanIndex(); if( !this.hasMoreTokens() ){ //this.assember.emitInstruction({type: "Push", arg: "R0"}) return [true, tmp_value]; } } this.popTokenScanIndex(); //this.assember.emitInstruction({type: "Push", arg: "R0"}) return [true,tmp_value]; } return [false,]; }, evalExpr40: function(){//二元乘除, 乘除运算都认为是左结合的 //expr40 := expr50 | expr50 ( '*' expr50 )* | expr50 ( '/' expr50)* //如果解析失败,恢复输入token流的扫描初始位置 if( !this.hasMoreTokens() ) return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误 this.pushTokenScanIndex(); var result = this.evalExpr50(); if (result[0]) { var tmp_value = result[1]; if( !this.hasMoreTokens() ) return [true, tmp_value]; this.pushTokenScanIndex(); var next_token = this.nextToken();//should use let; while(next_token=="*" || next_token=="/"){ //右递归之前,需要将当前的R0压栈: this.assember.emitInstruction({type: "Push", arg: "R0"}); // var result2 = this.evalExpr50(); if (!result2[0]) { //Here: *或/运算符已经匹配,后右边的子表达式不匹配,则输入无效 throw "Input Invalid"; } //成功: //var binFunc = this.mapBinaryOperator2BinaryFunction(next_token); //tmp_value = binFunc(tmp_value, result2[1]); if(next_token=="*") tmp_value *= result2[1]; else tmp_value /= result2[1]; { //乘法和除法运算都是左结合的,问题是,这里子表达式的优先级都大于*/ //正常情况下,先算左边的子表达式,压栈,再算右边的,压栈,所以: this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"}); this.assember.emitInstruction({type: "Pop", arg: "R0"}) this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token}) //注意,这里assembler的写法与二元函数expr60的类似,不同之处在于parser的控制流程:一个是递归,一个是while } // this.discardLastTokenScanIndex(); //下一次循环: if( !this.hasMoreTokens() ) return [true, tmp_value];//流已经结束,当前expr40子表达式解析完成(但不代表整体成功) this.pushTokenScanIndex(); next_token = this.nextToken(); }//end while; assert( next_token!="*" && next_token!="/"); this.popTokenScanIndex();//回退一个* /的位置,注意,这时可以清除流解析回退栈了(不清除其实也没关系) return [true,tmp_value];//最顶层的push不用pop了; } this.popTokenScanIndex(); throw "Invalid Input: expect expr40 here"; }, //TODO: FIXME 对二元运算符而言,不管其结合性如何、是否满足交换律,优先级高的子表达式先运算!!! // 但是现在不需要以“编译器”的行为来考虑问题,只是解释器,表达式可以认为没有负作用(赋值语句),则可以直接一边语法解析一边求值 evalExpr20: function(){//二元加减 if( !this.hasMoreTokens() ) return [false,]; //如果解析失败,不用恢复,直接报错 //expr20 := expr40 | expr40 ('+' expr40)* | expr40 ('-' expr40)* //加法可以是右结合的,减法不行, 这里把expr20改为expr40使得加法左结合 // | expr40 '+' expr20 //这么一来,加法将变成右结合的,不对; //this.pushTokenScanIndex(); var result = this.evalExpr40(); if (!result[0]) { return [false,];//整个表达式解析失败 } //循环地向前看一个运算符,或者是+,或者是- var tmp_value = result[1]; if( !this.hasMoreTokens() ) return [true, tmp_value]; this.pushTokenScanIndex(); var next_token=this.nextToken(); if(next_token==null){ //注意,前面已经有一个expr40解析成功,所以这里即使流已经结束,仍然可以成功返回 this.discardLastTokenScanIndex(); return [true, tmp_value]; } while(next_token=="+" || next_token=="-"){ if(next_token=="+"){ //右递归之前,需要将当前的R0压栈: this.assember.emitInstruction({type: "Push", arg: "R0"}); // var result2 = this.evalExpr40(); //<-- 必须把+运算parse为右递归,否则无法处理 1+2+3 这种情况 if (!result2[0]) { return false;//整个表达式解析失败 } tmp_value += result2[1]; { this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"}); this.assember.emitInstruction({type: "Pop", arg: "R0"}); this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: "+"}); } }else{//"-" //右递归之前,需要将当前的R0压栈: this.assember.emitInstruction({type: "Push", arg: "R0"}); // var result2 = this.evalExpr40(); if (!result2[0]) { return false;//整个表达式解析失败 } tmp_value -= result2[1]; { this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"}); this.assember.emitInstruction({type: "Pop", arg: "R0"}); this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: "-"}); } } //成功的情况: this.discardLastTokenScanIndex(); //下一次循环: if( !this.hasMoreTokens() ) return [true, tmp_value];//流已经结束,当前expr20子表达式解析完成(但不代表整体成功) this.pushTokenScanIndex(); next_token = this.nextToken(); }//end while assert( next_token!="+" && next_token!="-"); //非法期刊:1+2- this.popTokenScanIndex(); return [true, tmp_value]; }, calc: function(){ this.emitValueTokenIfAny();//!!! //输入全部在tokens里,视为一个正确的表达式输入流,后期也可以考虑错误处理 var result = this.evalExpr();//[success/fail, value] { alert(this.assember.toString()); var intercepter = new BytecodeIntercepter(); var interceptEvalResult = intercepter.eval(this.assember.getResult()); alert("字节码解释器求值结果="+interceptEvalResult+" \r\n直接递归下降解释执行结果="+result); } //assert( result[0] ); return result[1]; } }
parser的代码目前同时做2件事情:(1)老的直接在递归下降解析过程中求值,(2)新的通过Assembler生成字节码指令。
测试代码:
alert("7: sin(1+2)+cos(3-4)-tan(5*6)"); var ac = new AdvancedCalculator(); ac.emitButtons([ac.SIN, "(", "1", "+", "2", ")", "+", ac.COS, "(", "3", "-", "4", ")", "-", ac.TAN, "(", "5", "*", "6", ")"]); var result = ac.calc(); assertEquals(result, Math.sin(1+2)+Math.cos(3-4)-Math.tan(5*6));
MovImm 1 R0
Push R0
MovImm 2 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction +
CallPrimitiveFunction Sin
Push R0
MovImm 3 R0
Push R0
MovImm 4 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction -
CallPrimitiveFunction Cos
Mov R0 R1
Pop R0
CallPrimitiveFunction +
Push R0
MovImm 5 R0
Push R0
MovImm 6 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction *
CallPrimitiveFunction Tan
Mov R0 R1
Pop R0
CallPrimitiveFunction -
字节码解释器求值结果=7.086753510574282
直接递归下降解释执行结果=true,7.086753510574282
下一步工作:编写一个可视化界面?将JS代码格式化一下,然后变量命名再重构一下?加上AST生成和转换成JS运算表达式的支持?