公式解析器最终生成一个表达式对象,该表达式包含逆波兰表达式,用于后续的求值操作。
Expression 表达式类
文件: FormulaParser.js
// class: Expression // implements a expression object, contains the formula content and final reverse-borland expression. function Expression(arg) { this.set_ClassName("Expression"); // assign formula content if (arg != null) this.Formula = this.OriginlFormula = arg.ToString(); else this.Formula = this.OriginlFormula = ""; this.CurIdx = 0, this.LastToken = "none"; this.Tokens = []; }; Expression.prototype = { // indicates if reaches the end of formula. Eof: function() { return this.CurIdx >= this.Formula.length; }, // return the length of formula Length: function() { return this.Formula.length; }, // gets the char at pos. CharAt: function(pos) { return this.Formula.charAt(pos); }, // gets the char at CurIdx position. CurChar: function() { return this.Formula.charAt(this.CurIdx); }, // prints the tokens. PrintTokens: function() { var msg = ""; for (var n = 0; n < this.Tokens.length; n++) { if (this.Tokens[n].HasAncestor("OperandBase")) // output operand msg += this.Tokens[n].ToString(); else if (this.Tokens[n].IsUnary()) // output unary operator msg += "[" + this.Tokens[n].get_Sign() + "]"; else if (this.Tokens[n].IsFunction()) // output function msg += this.Tokens[n].get_Sign() + "()"; else msg += this.Tokens[n].get_Sign(); // output binary operator msg += ','; // print the seperator } return msg; } };
FormulaParser公式解析器类
文件:FormulaParser.js
// Class: FormulaParse // facilitates the parsing of formula. function FormulaParser() { this.set_ClassName("FormulaParser"); }; FormulaParser.prototype = { // parses a formula string and return a Expression object. Parse: function(formula) { var match = //s*=(.*)/gi.exec(formula); if (match == null) throw new Exception(this, "Parse", "Invalid expression:" + content); var expression = new Expression(RegExp.$1); expression.OriginalFormula = formula; __parse(expression); return expression; }, Test: function() { $Debug.WriteLine("=================== " + this.get_ClassName() + " ================"); var statement = [ "=0", "=1", "=9", "=10", "=0.1", "=0.0", "=.1", "=.0", "=0.", "=0.9", "=9.0", "=1.9", "=9.1", "=1E+10", "=0E-10", // formula "=1+2", "=1+2+3", "=1*2+3", "=1+2*3", "=1+2*3+4", "=1+2*3+4*5", "=(1+2)+3", "=1+(2+3)", "=1*(2+3)", "=1*(2+3)*(4+5)", "=1*(((2+3)))", "=1 >=2 || 1 <= 2", "=1 >= 2&& 1 <= 2", // precedence "=1+2-3*4", "=1+2-3*4/5", "=-1 + 2 - 3 * 4 / 5 + 6 | 7 + 8 & 9 + ~10 + !11", "=-1 + +2 + ++++3", "=-1 + +2 * +++-3", // Boolean "=true + FaLsE || TrUe", // Function "=Sum(1, 2, 3)", "=Sum(1 + 2, 3, 4)", "=Sum(1, Average(2, 3))", "= 1 + Sum(1, 2, 3)", "= 1 + -Sum(1, 2, 3)", "=-1+-sum(2)*3+4*5", "=-1+-+-sum(2)*3+4*5", "=-1+-+sum(2,-3,+4,5+6,6+7*8,average(9,10))*11+12/13", "=0.9 + 1E-10 + Sum(0.1, 0.2, 0.3, Average(Sum(0.4, 0.5, 0.6), 1, 2, 3)) + 4 * 5", "=10" ]; for (var i = 0; i < statement.length; i++) { try { $Debug.WriteLine("Parsing:/t" + statement[i]); var expression = (new FormulaParser()).Parse(statement[i]); $Debug.WriteLine("Express:/t" + expression.PrintTokens()); $Debug.WriteLine("Evaluate:/t"); } catch (e) { $Debug.WriteLine("Failed to parse /"" + statement[i] + "/"/nError:" + e.description); } } } // Test }; // -------------------------------------------------------- // Internal Functions // -------------------------------------------------------- // checks if a char is digit. function __isDigit(ch) { return (ch >= '0' && ch <= '9') || (ch == '.'); }; // checks if a char is letter. function __isLetter(ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); }; // // checks if a char is operator. function __isOperator(ch) { return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '%' || ch == '>' || ch == '<' || ch == '&' || ch == '|' || ch == '!' || ch == '~' || ch == '^'; }; // -------------------------------------------------------- // Number parsing functions // -------------------------------------------------------- // extracts a number string. function __extractNumber(expression) { var pos = expression.CurIdx; var plusNum = 0, pointNum = 0, eNum = 0; // gets a number string. while (!expression.Eof()) { var ch = expression.CurChar(); if (!__isDigit(ch)) { if (ch == '.') { if (pointNum > 0) break; pointNum++; } else { if (ch == '+' || ch == '-') { if (eNum == 0 || plusNum > 0) break; plusNum++; } else { if (ch == 'E' || ch == 'e') { if (eNum > 0) break; eNum++; } else { break; } // if ... else E/e } // if ... else +/- } // if ... else . } // if not digit expression.CurIdx++; } if (pointNum > 1 || plusNum > 1 || eNum > 1) throw new Exception(this, "__extractNumber", "Wrong number format"); // validate with regular expression. var str = expression.Formula.substring(pos, expression.CurIdx); var match = //d+/.?E[/+-]/d+/i.exec(str); // eg, x.yE-z, x.E-z or xE-z if (match == null) { var tailMatch = //./d+/i.exec(str); // eg, .x var floatMatch = match = //d+/.?/d*/i.exec(str); // eg, x.y or x // match failed if (tailMatch == null && floatMatch == null) throw new Exception(this, "__extractNumber", "Wrong number format"); if (tailMatch == null) { match = floatMatch; } else { if (floatMatch == null) { match == tailMatch; } else { // chooses the longer one. match = tailMatch[0].length > floatMatch[0].length ? tailMatch : floatMatch; } // if tailMatch } // if ... else tailMatch } // if not match if (match[0].length > 0) { return match[0]; } throw new Exception(this, "__extractNumber", "Error occurs when parse number"); }; // implements the operators sorting function. // Note: the __parseNumber uses stack.pop() method to fetch the // saved items, so the comparison is reversed. function __operatorSortFunc(left, right) { if (left.IsPreceding(right)) return 1; else if (right.IsPreceding(left)) return -1; else return 0; }; function __getNumberRelevantOperators(expression) { var tokenArray = []; while (expression.Tokens.length > 0) { var token = expression.Tokens.pop(); if (token.HasAncestor("OperatorBase")) { if (token.IsUnary()) { tokenArray.push(token); } else { if (token.IsBinary()) { if (tokenArray.length > 0) { if (tokenArray[tokenArray.length - 1].IsUnary()) { tokenArray.push(token); } else { // current token has higher priority if (tokenArray[tokenArray.length - 1].IsPreceding(token)) { tokenArray.push(token); } else { expression.Tokens.push(token); break; } // if ... else last token is precedent } // if ... else last token is unary } else { tokenArray.push(token); } // if ... else tokenArray.length > 0 } else { // function or parentheses expression.Tokens.push(token); while (tokenArray.length > 0 && tokenArray[tokenArray.length - 1].IsUnary()) { expression.Tokens.push(tokenArray.pop()); } break; } // if ... else token is binary } // if ... else token is unary } else { // operand expression.Tokens.push(token); if (tokenArray.length > 0) { // has token? // restore unary's operand if (tokenArray[tokenArray.length - 1].IsUnary()) { expression.Tokens.push(tokenArray.pop()); } else { if (tokenArray.length > 1) { if (tokenArray[tokenArray.length - 1].IsPreceding(tokenArray[tokenArray.length - 2])) expression.Tokens.push(tokenArray.pop()); } } } break; } // if .. else token is operator } // while // Sort by reversed precedence. tokenArray.sort(__operatorSortFunc); return tokenArray; } // parses a number and rearrange the relevant operators. function __parseNumber(expression) { // operand after operand is illegal. if (expression.LastToken == "operand") { throw new Exception(this, "__parseNumber", "Digit '" + expression.CurChar() + "' should not appear"); } var operand = new OperandNumber(__extractNumber(expression)); if (expression.LastToken == "operator" || expression.LastToken == "parentheses") { var tokenArray = __getNumberRelevantOperators(expression); $ASSERT(tokenArray.length > 0); expression.Tokens.push(operand); // attach operators while (tokenArray.length > 0) expression.Tokens.push(tokenArray.pop()); } else { expression.Tokens.push(operand); } expression.LastToken = "operand"; }; // __parseNumber // extracts a word. A word may be a function name or a cell address string. function __extractFunctionName(expression) { var match = /[a-z,/$]+/d*/i.exec(expression.Formula.substring(expression.CurIdx, expression.Formula.length)); if (match != null) { expression.CurIdx += match[0].length; return match[0]; } else throw new Exception(this, "__extractFunctionName", "Word not found"); }; // skips blank chars. function __skipBlank(expression) { while (!expression.Eof()) { ch = expression.CharAt(expression.CurIdx); if (ch != ' ') break; expression.CurIdx++; } }; function __extractContentOfParentheses(expression) { if (expression.Eof() || expression.CurChar() != '(') throw new Exception(this, "__extractContentOfParentheses", "Should start with left parentheses"); var num = 0; var pos = expression.CurIdx; while (!expression.Eof()) { ch = expression.CharAt(expression.CurIdx); if (ch == '(') { num++; } else { if (ch == ')') { num--; if (num == 0) { break; } } // if right parentheses } // if ... else expression.CurIdx++; } return expression.Formula.substring(pos + 1, expression.CurIdx++); }; // extracts arguments from a string. function __extractArgumentsFromStr(argumentsStr) { var argArray = []; var pos = 0, parenthesesNum = 0, arg = "", ch; while (pos < argumentsStr.length) { ch = argumentsStr.charAt(pos); switch (ch) { case '(': { parenthesesNum++; arg += ch; } break; case ')': { parenthesesNum--; arg += ch; } break; case ',': { if (parenthesesNum == 0) { argArray.push(arg); arg = ""; } else arg += ch; } break; default: arg += ch; break; } // switch pos++; } // while if (arg.length > 0) argArray.push(arg); return argArray; } // parses a function. function __parseFunction(expression) { if (!(expression.LastToken == "none" || expression.LastToken == "operator" || expression.LastToken == "parentheses")) { throw new Exception(this, "__parseFunction", "'" + expression.CurChar() + "' should not appear here"); } var word = __extractFunctionName(expression); // boolean? var lowerWord = word.toLowerCase(); if (lowerWord == "true" || lowerWord == "false") { expression.Tokens.push(new OperandBoolean(word)); expression.LastToken = "operand"; return ; } __skipBlank(expression); // Is cell ? // TODO // Function parameter var paramStrList = __extractArgumentsFromStr(__extractContentOfParentheses(expression)); var paramStack = []; var tempExp; for (var i = 0; i < paramStrList.length; i++) { tempExp = new Expression(paramStrList[i]); __parse(tempExp); paramStack = paramStack.concat(tempExp.Tokens); } // the number of parameters paramStack.push(new OperandNumber(paramStrList.length)); // the function(as an operator) paramStack.push(new FunctionUnknown(word)); // Sort previous tokens var tokenArray = []; if (expression.LastToken == "operator" || expression.LastToken == "parentheses") { // keep the last the last operator tokenArray.push(expression.Tokens.pop()); while(expression.Tokens.length > 0) { token = expression.Tokens.pop(); if (!token.HasAncestor("OperatorBase")) { expression.Tokens.push(token); break; } else tokenArray[tokenArray.length] = token; if (token.IsBinary()) break; } } expression.Tokens = expression.Tokens.concat(paramStack.concat(tokenArray)); expression.LastToken = "function"; }; // __parse letter function __parseOperator(expression) { var ch = expression.CurChar(); if (!(ch == "+" || ch == "-")) { if (!(expression.LastToken == "operand" || expression.LastToken == "function" || expression.LastToken == "operator" || expression.LastToken == "parentheses")) throw new Exception(this, "__parseOperator", "Operator'" + ch + "' should not appears"); } else { // Unary +/- ? if (expression.LastToken == "none" || expression.LastToken == "operator") if (!(expression.CurIdx < expression.Length() - 1 && expression.CharAt(expression.CurIdx + 1) != ' ')) throw new Exception(this, "__parseOperator", "Space should not appear after unary operatorn '" + ch + "'"); } switch (ch) { case "+": { if (expression.LastToken == "none" || expression.LastToken == "operator") { expression.Tokens.push(new OperatorPositive()); } else expression.Tokens.push(new OperatorAdd()); } break; case "-": { if (expression.LastToken == "none" || expression.LastToken == "operator") { expression.Tokens.push(new OperatorNegative()); } else expression.Tokens.push(new OperatorSubtract()); } break; case "*": expression.Tokens.push(new OperatorMultiply()); break; case "/": expression.Tokens.push(new OperatorDivide()); break; case "%": expression.Tokens.push(new OperatorModule()); break; case "&": { if (expression.CurIdx < expression.Length() - 1 && expression.CharAt(expression.CurIdx + 1) == '&') { expression.Tokens.push(new OperatorLogicAnd()); expression.CurIdx++; } else { // Excel uses 'AND' as the logic and // uses the '&' as string concat. expression.Tokens.push(new OperatorStringConcat()); } } break; case "|": { if (expression.CurIdx < expression.Length() - 1 && expression.CharAt(expression.CurIdx + 1) == '|') { expression.Tokens.push(new OperatorLogicOr()); expression.CurIdx++; } else { expression.Tokens.push(new OperatorBitwiseOr()); } } break; case ">": if (expression.CurIdx < expression.Length() - 1 && expression.CharAt(expression.CurIdx + 1) == '=') { expression.Tokens.push(new OperatorGreaterEqual()); expression.CurIdx++; } else expression.Tokens.push(new OperatorGreaterThan()); break; case "<": if (expression.CurIdx < expression.Length() - 1 && expression.CharAt(expression.CurIdx + 1) == '=') { expression.Tokens.push(new OperatorLessEqual()); expression.CurIdx++; } else expression.Tokens.push(new OperatorLessThan()); break; case "~": expression.Tokens.push(new OperatorBitwiseNot()); break; case "!": expression.Tokens.push(new OperatorLogicNot()); break; default: throw new Exception(this, "__parseOperator", "Unsupported OperatorBase:" + ch); } // switch expression.LastToken = "operator"; expression.CurIdx++; }; // __parseOperator function __parseString(expression) { var num = 1, pos = expression.CurIdx + 1, ch; while (pos < expression.Length()) { ch = expression.CharAt(pos); if (ch == '"') break; pos++; } expression.LastToken = "operand"; expression.Tokens.push(new OperandString(expression.Formula.substring(expression.CurIdx + 1, pos))); expression.CurIdx = pos + 1; }; // __parseString function __parseArguments(expression) { var num = 1, pos = expression.CurIdx + 1, ch; while (pos < expression.Length()) { ch = expression.CharAt(pos); if (ch == '(') { num++; } else { if (ch == ')') { num--; if (num == 0) { break; } } // if ) } // if ... else ( pos++; } var strList = expression.Formula.substring(expression.CurIdx + 1, pos).split(","); var paramList = []; for (var n = 0; n < strList.length; n++) { if (//s*(.*)/s*/.gi.exec(strList[n])) paramList.push(RegExp.$1); } var token = expression.Tokens.pop(); var tempFormulas = []; var tempExp; // push parameter for (var n = 0; n < paramList.length; n++) { tempExp = new Expression(paramList[n]); __parse(tempExp); tempFormulas.push(tempExp); } // for for (var n = 0; n < tempFormulas.length; n++) { for (var m = 0; m < tempFormulas[n].Tokens.length; m++) { expression.Tokens.push(tempFormulas[n].Tokens[m]); } // for } // for expression.Tokens.push(new OperatorNumber(paramList.length)); expression.Tokens.push(token); expression.CurIdx = pos + 1; }; function __parseParentheses(expression) { // function arguments if (expression.LastToken == "function") __parseArguments(expression); else { if (!(expression.LastToken == "operator" || expression.LastToken == "parentheses" || expression.LastToken == "none")) throw new Exception(this, "__parseParentheses", "Unrespected branch, last token is " + expression.LastToken); var num = 1, pos = expression.CurIdx + 1, ch; while (pos < expression.Length()) { ch = expression.CharAt(pos); if (ch == '(') num++; else { if (ch == ')') { num--; if (num == 0) break; } // if ) } // if ... else ( pos++; } // while var lastToken = expression.LastToken == "operator" ? expression.Tokens.pop() : null; var tempFormula = new Expression(expression.Formula.substring(expression.CurIdx + 1, pos)) __parse(tempFormula); for (var n = 0; n < tempFormula.Tokens.length; n++) expression.Tokens.push(tempFormula.Tokens[n]); if (lastToken != null) expression.Tokens.push(lastToken); expression.CurIdx = pos + 1; expression.LastToken = "parentheses"; } // if ... else }; // __parseParentheses function __parse(expression) { var iteratorCount = 0; while (!expression.Eof()) { var ch = expression.CharAt(expression.CurIdx); try { if (__isLetter(ch)) { __parseFunction(expression); } else { if (__isDigit(ch)) { __parseNumber(expression); } else { if (__isOperator(ch)) { __parseOperator(expression); } else { switch (ch) { case ' ': expression.CurIdx++; break; case '"': __parseString(expression); break; case '(': __parseParentheses(expression); break; default: throw new Exception(this, "__parse", "Unknown char:'" + ch + "'"); } // switch } // if ... else operaotr char } // if ... digit digital char } // if ... else letter } catch (e) { $Debug.WriteLine("Error at " + expression.CurIdx + ", char '" + ch + "'/nError:" + e.description); break; } iteratorCount++; if (iteratorCount > expression.Formula.length) throw new Exception(this, "__parse", "Fall into endless loop"); } };
比较长,还包含了测试代码。