使用JavaScript实现一个“字节码解释器”,并用它重新实现JS科学计算器的后端(后续4)

成功解决了字节码指令生成的问题,掌握一个原则:

1、数值常量expr100一律是MovImm #imm, R0,但把要不要执行Push R0推迟到expr20-60里做判断;
2、假设任何表达式的指令生成结果都对应于其值在R0里,要不要Push 当且仅当:

     此子表达式出现在二元运算的左侧,且右侧需要递归。(注意,右侧如果只是一个数值常量的话,倒是可以直接MovImm #right_imm, R1,不需要对左侧的结果进行Push R0)

function AdvancedCalculator(){
   //语法分析的原始输入流:
   this.tokens = [];//中缀带括号的, 3种语法分析输入单位:类型为String的(和)、类型为Number的value、类型为Object/String的运算符
   this.tokens_scan_index = 0;
   this.saved_tokens_scan_index_stack = [];
    //用于栈式自动机直接求值的转换后的流:
   //this.tokens2_values = [];//后缀value栈
   //this.tokens2_op = [];//运算符栈,由于去除了括号,所以只需要区分运算符是一元还是二元的      
   this.value_buffer = [];
   this.assember = new Assembler();
}

AdvancedCalculator.prototype = {
   //复杂的运算符定义为单独的Object:
   SQRT:  "Sqrt",
   SIN:  "Sin",
   COS:  "Cos",
   TAN: "Tan",
   COT: "Cot",
   LOG:  "Log", //以10为底
   LN:  "Ln", //以e为底
   POW:  "Pow",//x^y
   PI: Math.PI, //这是数值常量,不是运算符,不过也可以映射为0个输入的函数??

   mapUnaryOperator2UnaryFunction: function(opToken){
      if(opToken==this.SQRT)
         return Math.sqrt;
      else if(opToken==this.SIN)
         return Math.sin;
      else if(opToken==this.COS)
         return Math.cos;
      else if(opToken==this.TAN)
         return Math.tan;
      else if(opToken==this.COT)
         return function(a){return 1/Math.tan(a);};
      else if(opToken==this.LOG)
         return Math.log10;
      else if(opToken==this.LN)
         return Math.log;
      else
         throw "未识别的一元运算符: "+opToken;
   },
   mapBinaryOperator2BinaryFunction: function(opToken){
      if(opToken==this.POW)
         return Math.pow;
      else if(opToken=="+")
         return function(a,b){return a+b;};
      else if(opToken=="-")
         return function(a,b){return a-b;};
      else if(opToken=="*")
         return function(a,b){return a*b;};
      else if(opToken=="/")
         return function(a,b){return a/b;};
      else
         throw "未识别的二元运算符: "+opToken;
   },

  nextToken: function(){
      if (this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length){
         var token_next = this.tokens[this.tokens_scan_index++];
         return token_next;
      }
      return null;//throw "错误的调用:token流已经结束";
   },
  hasMoreTokens: function(){
      return this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length;
   },

  pushTokenScanIndex: function(){
     assert( this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length);
     this.saved_tokens_scan_index_stack.push(this.tokens_scan_index);
      return this.saved_tokens_scan_index_stack.length-1;
  },
  popTokenScanIndexAt: function(stack_index){
     assert( stack_index>=0 && stack_index<this.saved_tokens_scan_index_stack.length);
     while(this.saved_tokens_scan_index_stack.length>stack_index)
        this.tokens_scan_index = this.saved_tokens_scan_index_stack.pop();
  },
  popTokenScanIndex: function(){
      this.tokens_scan_index = this.saved_tokens_scan_index_stack.pop();
  },
  discardLastTokenScanIndex: function(){
     this.saved_tokens_scan_index_stack.pop();
  },

   isUnaryOperator: function(token){
      return token==this.SIN || token== this.COS  || token==this.TAN || token==this.COT || token==this.LOG || token==this.LN;
   },
   isBinaryFunctionToken: function(token){//特殊的二元函数
      return token==this.POW;
   },
   isBinaryOperator: function(token){//所有的二元中缀操作符(包括二元函数)
      return token=="+" || token=="-" || token=="*" || token=="/" || this.isBinaryFunctionToken(token);
   },
   isOperator: function(token){//返回:0/1单元运算符包括函数/2元运算符
      if (this.isUnaryOperator(token))
         return 1;
      if (this.isBinaryOperator(token))
         return 2;
      return 0;
   },
   emitToken: function(token){
       this.tokens.push(token);
   },
   emitValueTokenIfAny: function(){
      //检查之前缓存的value_buffer
      if (this.value_buffer.length>0) {
         var value_str = this.value_buffer.join('');
         var value = Number(value_str); //a Number
         this.emitToken(value);
         this.value_buffer = []; //reset;
      }
   },
   emitButton: function(btn){
      if (btn=="(" || btn==")"){//括号是一种特殊的优先级运算符
         this.emitValueTokenIfAny();
         this.emitToken(btn);
      }
      else if (this.isOperator(btn)){
         this.emitValueTokenIfAny();
         this.emitToken(btn);
      }else{//0,1,2,3,4,5,6,7,8,9,.
         this.value_buffer.push(btn);
      }
   },
   emitButtons: function(btns){
      for(var i=0; i<btns.length; ++i){
        var btn = btns[i];
        this.emitButton(btn);
      }
   },
   //核心算法:如何把一个中缀的混合value和operator的流转换为分离的value和operator的求值栈?
   concat: function(target, source){
      while(source.length>0){
         var item = source.shift();
         target.push(item);
      }
   },
   evalExpr: function(){
      return this.evalExpr20();//利用短路特性,前一个得到true的话后续子表达式不会执行
   },
   evalExpr100: function(){
      if( !this.hasMoreTokens() )
         return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误
      this.pushTokenScanIndex();
      var next_token = this.nextToken();
      if(next_token==null)
         throw "流非正常结束,此处应有一数值value!";
      assert(typeof next_token=="number");
      if(typeof next_token=="number"){
         this.assember.emitInstruction({type: "MovImm", arg: next_token, arg1: "R0"});
         //this.assember.emitInstruction({type: "Push", arg: "R0"});
         //正常情况下不需要push,只有发现此常量参与了一个二元原语函数的运算左端,而右端是一个需要递归的子表达式的时候
         return [true,next_token];
      }
      this.popTokenScanIndex();
      return [false,];
   },
   evalExpr80: function(){//括号表达式: 似乎不需要特殊处理?因为它只是改变了子表达式的优先级而已
      if( !this.hasMoreTokens() )
         return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误
       this.pushTokenScanIndex();
       var next_token = this.nextToken();
       if (next_token==null){//流已经结束
          return false;
       }
       if (next_token=="("){
         var result = this.evalExpr(); //if has ES6 destructing, can write as var [success, value] = ...
         if(!result[0])
            throw "TODO: fixme";//此时saved_tokens_scan_index需要维护成一个栈了
         var next_next_token = this.nextToken();
         if(next_next_token==null)
            throw "流异常结束:expect a )";
         assert( next_next_token==")" );
         {
            //this.assember.emitInstruction({type: "Pop", arg: "R0"});
            //this.assember.emitInstruction({type: "Push", arg: "R0"});
         }
         return result;
      }
      //else:
      this.popTokenScanIndex();
      var result = this.evalExpr100();
      //this.assember.emitInstruction({type: "Pop", arg: "R0"});
      return result;
   },
   evalExpr60: function(){//一元函数
      if( !this.hasMoreTokens() )
         return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误
       this.pushTokenScanIndex();
       var next_token = this.nextToken();
       if (next_token==null) {
         throw "流异常结束:期望一个Expr60";
       }
       if(this.isUnaryOperator(next_token)){
         var unaryOp = next_token;
         var result = this.evalExpr80();
         if (!result[0])
            throw "非法表达式!";//此时saved_tokens_scan_index需要维护成一个栈了, TODO: 支持 sin sin 1的语法?
         var unaryFunc = this.mapUnaryOperator2UnaryFunction(unaryOp);
         {
            //this.assember.emitInstruction({type: "Pop", arg: "R0"});
            this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token});;
            //this.assember.emitInstruction({type: "Push", arg: "R0"});
         }
         return [true, unaryFunc(result[1])];
      }
      this.popTokenScanIndex();
      var result = this.evalExpr80();
      //this.assember.emitInstruction({type: "Push", arg: "R0"});
      return result;
   },
   evalExpr50: function(){//二元函数,如x^y(Pow求幂)
      if( !this.hasMoreTokens() )
         return [false,];
      //expr50  := expr60 x^y expr50 | expr60
      var result = this.evalExpr60();
      if (result[0]) {
         var tmp_value = result[1];
         if( !this.hasMoreTokens() ) {
            //this.assember.emitInstruction({type: "Push", arg: "R0"})
            return [true, tmp_value];
         }
         this.pushTokenScanIndex();
         var next_token = this.nextToken();//should use let;
         while(this.isBinaryFunctionToken(next_token)){
            //右递归之前,需要将当前的R0压栈:
            this.assember.emitInstruction({type: "Push", arg: "R0"});
            var result2 = this.evalExpr50();
            if (!result2[0]) {
               //Here: 二元函数运算符(如x^y)已经匹配,但右边的子表达式不匹配,则输入无效
               throw "Input Invalid";
            }
            //这里的递归已经处理了结合性的问题
            var binFunc = this.mapBinaryOperator2BinaryFunction(next_token);
            tmp_value = binFunc(tmp_value, result2[1]);
            {
               //将当前右递归的运算结果(R0)移动到R1:
               this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});
               //将之前压栈的左侧值出栈:
               this.assember.emitInstruction({type: "Pop", arg: "R0"});
               this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token});
            }
            //
            this.discardLastTokenScanIndex();
            if( !this.hasMoreTokens() ){
                //this.assember.emitInstruction({type: "Push", arg: "R0"})
                return [true, tmp_value];
            }
         }
         this.popTokenScanIndex();
         //this.assember.emitInstruction({type: "Push", arg: "R0"})
         return [true,tmp_value];
      }
      return [false,];
   },
   evalExpr40: function(){//二元乘除, 乘除运算都认为是左结合的
      //expr40 := expr50 | expr50 ( '*' expr50 )* | expr50 ( '/' expr50)*
      //如果解析失败,恢复输入token流的扫描初始位置
      if( !this.hasMoreTokens() )
         return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误
      this.pushTokenScanIndex();
      var result = this.evalExpr50();
      if (result[0]) {
         var tmp_value = result[1];
         if( !this.hasMoreTokens() )
            return [true, tmp_value];
         this.pushTokenScanIndex();
         var next_token = this.nextToken();//should use let;
         while(next_token=="*" || next_token=="/"){
            //右递归之前,需要将当前的R0压栈:
            this.assember.emitInstruction({type: "Push", arg: "R0"});
            //
            var result2 = this.evalExpr50();
            if (!result2[0]) {
               //Here: *或/运算符已经匹配,后右边的子表达式不匹配,则输入无效
               throw "Input Invalid";
            }
            //成功:
             //var binFunc = this.mapBinaryOperator2BinaryFunction(next_token);
             //tmp_value = binFunc(tmp_value, result2[1]);
             if(next_token=="*")
                tmp_value *= result2[1];
             else
                tmp_value /= result2[1];
             {
                //乘法和除法运算都是左结合的,问题是,这里子表达式的优先级都大于*/
                //正常情况下,先算左边的子表达式,压栈,再算右边的,压栈,所以:
                this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});
                this.assember.emitInstruction({type: "Pop", arg: "R0"})
                this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token})
                //注意,这里assembler的写法与二元函数expr60的类似,不同之处在于parser的控制流程:一个是递归,一个是while
             }
             //
             this.discardLastTokenScanIndex();
            //下一次循环:
            if( !this.hasMoreTokens() )
               return [true, tmp_value];//流已经结束,当前expr40子表达式解析完成(但不代表整体成功)
            this.pushTokenScanIndex();
            next_token = this.nextToken();
         }//end while;
         assert( next_token!="*" && next_token!="/");
         this.popTokenScanIndex();//回退一个* /的位置,注意,这时可以清除流解析回退栈了(不清除其实也没关系)
         return [true,tmp_value];//最顶层的push不用pop了;
      }
      this.popTokenScanIndex();
      throw "Invalid Input: expect expr40 here";
    },
   //TODO: FIXME 对二元运算符而言,不管其结合性如何、是否满足交换律,优先级高的子表达式先运算!!!
   //        但是现在不需要以“编译器”的行为来考虑问题,只是解释器,表达式可以认为没有负作用(赋值语句),则可以直接一边语法解析一边求值
   evalExpr20: function(){//二元加减
      if( !this.hasMoreTokens() )
         return [false,];
     //如果解析失败,不用恢复,直接报错
     //expr20 := expr40 | expr40 ('+' expr40)* | expr40 ('-' expr40)* //加法可以是右结合的,减法不行, 这里把expr20改为expr40使得加法左结合
     //         | expr40 '+' expr20  //这么一来,加法将变成右结合的,不对;
     //this.pushTokenScanIndex();
     var result = this.evalExpr40();
      if (!result[0]) {
         return [false,];//整个表达式解析失败
      }
      //循环地向前看一个运算符,或者是+,或者是-
      var tmp_value = result[1];
      if( !this.hasMoreTokens() )
         return [true, tmp_value];
      this.pushTokenScanIndex();
      var next_token=this.nextToken();
      if(next_token==null){
            //注意,前面已经有一个expr40解析成功,所以这里即使流已经结束,仍然可以成功返回
            this.discardLastTokenScanIndex();
            return [true, tmp_value];
         }
      while(next_token=="+" || next_token=="-"){
         if(next_token=="+"){
            //右递归之前,需要将当前的R0压栈:
            this.assember.emitInstruction({type: "Push", arg: "R0"});
            //
            var result2 = this.evalExpr40(); //<-- 必须把+运算parse为右递归,否则无法处理 1+2+3 这种情况
            if (!result2[0]) {
              return false;//整个表达式解析失败
            }
            tmp_value += result2[1];
            {
              this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});
              this.assember.emitInstruction({type: "Pop", arg: "R0"});
              this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: "+"});
            }
         }else{//"-"
            //右递归之前,需要将当前的R0压栈:
            this.assember.emitInstruction({type: "Push", arg: "R0"});
            //
            var result2 = this.evalExpr40();
            if (!result2[0]) {
              return false;//整个表达式解析失败
            }
            tmp_value -= result2[1];
            {
              this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});
              this.assember.emitInstruction({type: "Pop", arg: "R0"});
              this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: "-"});
            }
         }
         //成功的情况:
         this.discardLastTokenScanIndex();
         //下一次循环:
         if( !this.hasMoreTokens() )
               return [true, tmp_value];//流已经结束,当前expr20子表达式解析完成(但不代表整体成功)
         this.pushTokenScanIndex();
         next_token = this.nextToken();
      }//end while
      
      assert( next_token!="+" && next_token!="-"); //非法期刊:1+2-
      this.popTokenScanIndex();
      return [true, tmp_value];
   },
   calc: function(){
      this.emitValueTokenIfAny();//!!!
      //输入全部在tokens里,视为一个正确的表达式输入流,后期也可以考虑错误处理
      var result = this.evalExpr();//[success/fail, value]
      {
        alert(this.assember.toString());
        var intercepter = new BytecodeIntercepter();
        var interceptEvalResult = intercepter.eval(this.assember.getResult());
        alert("字节码解释器求值结果="+interceptEvalResult+" \r\n直接递归下降解释执行结果="+result);
      }
      //assert( result[0] );
      return result[1];
   }
}

parser的代码目前同时做2件事情:(1)老的直接在递归下降解析过程中求值,(2)新的通过Assembler生成字节码指令。


测试代码:

alert("7: sin(1+2)+cos(3-4)-tan(5*6)");
var ac = new AdvancedCalculator();
ac.emitButtons([ac.SIN, "(", "1", "+", "2", ")", "+", ac.COS, "(", "3", "-", "4", ")", "-", ac.TAN, "(", "5", "*", "6", ")"]);
var result = ac.calc();
assertEquals(result, Math.sin(1+2)+Math.cos(3-4)-Math.tan(5*6));

成功输出:

MovImm 1 R0
Push R0
MovImm 2 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction +
CallPrimitiveFunction Sin
Push R0
MovImm 3 R0
Push R0
MovImm 4 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction -
CallPrimitiveFunction Cos
Mov R0 R1
Pop R0
CallPrimitiveFunction +
Push R0
MovImm 5 R0
Push R0
MovImm 6 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction *
CallPrimitiveFunction Tan
Mov R0 R1
Pop R0
CallPrimitiveFunction -

字节码解释器求值结果=7.086753510574282
直接递归下降解释执行结果=true,7.086753510574282


下一步工作:编写一个可视化界面?将JS代码格式化一下,然后变量命名再重构一下?加上AST生成和转换成JS运算表达式的支持?


你可能感兴趣的:(JavaScript,字节码,编译器,解释器,指令生成)