缘起
最近拜读前桥和弥[日]的<<自制编程语言>>
开头一章便是教读者使用lex/yacc工具
制作四则运算器
其中yacc的移进/归约/梯度下降的思想很有启发
于是使用golang练习之
目标
- 制作一个四则运算器, 从os.Stdin读入一行表达式, 然后输出计算过程和结果
- 支持+ - * /
- 支持左右括号
- 支持负数
难点
记号扫描(lexer)
- 逐字符扫描记号
- 单字符记号可直接识别, + - * / ( )
多字符记号, 此处只有浮点数, 通过有限状态的转换进行识别
+ '-' = INT_STATUS
+ 'd' = INT_STATUS
+ '.' = DOT_STATUS
+ 'SPACE | + | - | * | / | ( | )' = INITIAL
+ 'd' = FRAG_STATUS
+ 'SPACE | + | - | * | / | ( | )' = INITIAL
运算优先级
- /优先级最高, 可以立即归约计算
- 括号次之, 遇到右括号, 应当触发+ -归约
- 程序末尾, 没有新的记号剩余, 对+ -进行归约
识别负数
- 简单起见, 本程序总是使用浮点数作为基本计算单位
- 把负号识别为浮点数的可选部分: 浮点数 = -?d+(.d+)?
总体流程
- 从os.Stdin读入一行表达式字符串
- 逐字符扫描记号流, 放入记号队列
- 逐记号出队, 置入计算栈
- 判断栈顶是否符合归约条件, 是则进行计算
- 记号队列空, 对计算栈进行最终计算
- 输出结果
main.go
从os.Stdin循环读入行
调用lexer.Parse获得记号流
调用parser.Parse进行计算
func main() { reader := bufio.NewReader(os.Stdin) for { fmt.Printf("=> ") arrBytes, _, err := reader.ReadLine() if err != nil { panic(err.Error()) } line := strings.TrimSpace(string(arrBytes)) expression := line tokens, e := lexer.Parse(expression) if e != nil { println(e.Error()) } else { e,v := parser.Parse(tokens) if e != nil { println(e.Error()) } fmt.Println(strconv.FormatFloat(v, 'f', 10, 64)) } } }
tokens/tokens.go
定义记号
package tokens type TOKENS string const IntLiteral TOKENS = "INT" const DoubleLiteral TOKENS = "DBL" const ADD TOKENS = "ADD" const SUB TOKENS = "SUB" const MUL TOKENS = "MUL" const DIV TOKENS = "DIV" const LB TOKENS = "LB" const RB TOKENS = "RB" const UNKNOWN TOKENS = "UNKNOWN" type Token struct { Token TOKENS Value string Position int } func OfRune(t TOKENS, r rune, from int) *Token { return &Token { Token: t, Value : string(r), Position: from, } } func OfString(t TOKENS, s string, from int) *Token { return &Token { Token: t, Value : s, Position: from, } }
states/states.go
定义lexer的状态
type STATES int const INITIAL STATES = 1 const INT_STATUS STATES = 11 const DOT_STATUS STATES = 12 const FRAG_STATUS STATES = 13
lexer/lexer.go
记号扫描
type tLexerState struct { state states.STATES tokens []*tokens.Token buffer []rune i0 int i1 int d0 int d1 int } func (me *tLexerState) AppendToken(t *tokens.Token) { me.tokens = append(me.tokens, t) } func (me *tLexerState) AppendChar(it rune) { me.buffer = append(me.buffer, it) } func (me *tLexerState) BufferSize() int { return len(me.buffer) } func (me *tLexerState) IntSize() int { return me.i1 - me.i0 + 1 } func (me *tLexerState) FragSize() int { return me.d1 - me.d0 + 1 } func Parse(line string) ([]*tokens.Token, error) { var state = &(tLexerState{ state: states.INITIAL, tokens: make([]*tokens.Token, 0), buffer: make([]rune, 0), i0 : 0, i1 : 0, d0: 0, d1: 0, }) for i, it := range line + "\n" { e := parseChar(state, i, it) if e != nil { return nil, e } } return state.tokens, nil } func parseChar(state *tLexerState, i int, it rune) error { var e error = nil switch state.state { case states.INITIAL: e = parseCharWhenInitial(state, i, it) break case states.INT_STATUS: e = parseCharWhenInt(state, i, it) break case states.DOT_STATUS: e = parseCharWhenDot(state, i, it) break case states.FRAG_STATUS: e = parseCharWhenFrag(state, i, it) break } return e } func parseCharWhenInitial(state *tLexerState, i int, it rune) error { if is_minus(it) || is_0_to_9(it) { state.state = states.INT_STATUS state.buffer = make([]rune, 0) state.buffer = append(state.buffer, it) state.i0 = i state.i1 = i } else if is_space(it){ return nil } else if is_add(it) { state.AppendToken(tokens.OfRune(tokens.ADD, it, i)) } else if is_sub(it) { state.AppendToken(tokens.OfRune(tokens.SUB, it, i)) } else if is_mul(it) { state.AppendToken(tokens.OfRune(tokens.MUL, it, i)) } else if is_div(it) { state.AppendToken(tokens.OfRune(tokens.DIV, it, i)) } else if is_lb(it) { state.AppendToken(tokens.OfRune(tokens.LB, it, i)) } else if is_rb(it) { state.AppendToken(tokens.OfRune(tokens.RB, it, i)) } else { return errors.New(fmt.Sprintf("parseCharWhenInitial, invalid char %c at %d", it, i)) } return nil } func parseCharWhenInt(state *tLexerState, i int, it rune) error { if is_0_to_9(it) { if state.BufferSize() >= 10 { return errors.New(fmt.Sprintf("too large int number at %v", i)) } else { state.AppendChar(it) state.i1 = i } } else if is_dot(it) { state.AppendChar(it) state.state = states.DOT_STATUS } else if is_space(it) { state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1)) state.state = states.INITIAL } else if is_rb(it) { state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.RB, it, i)) state.state = states.INITIAL } else if is_add(it) { state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.ADD, it, i)) state.state = states.INITIAL } else if is_sub(it) { state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.SUB, it, i)) state.state = states.INITIAL } else if is_mul(it) { state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.MUL, it, i)) state.state = states.INITIAL } else if is_div(it) { state.AppendToken(tokens.OfString(tokens.IntLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.DIV, it, i)) state.state = states.INITIAL } else { return errors.New(fmt.Sprintf("parseCharWhenInt, invalid char %c at %d", it, i)) } return nil } func parseCharWhenDot(state *tLexerState, i int, it rune) error { if is_0_to_9(it) { state.state = states.FRAG_STATUS state.AppendChar(it) state.d0 = i state.d1 = i } else { return errors.New(fmt.Sprintf("parseCharWhenDot, invalid char %c at %d", it, i)) } return nil } func parseCharWhenFrag(state *tLexerState, i int, it rune) error { if is_0_to_9(it) { if state.FragSize() >= 10 { return errors.New(fmt.Sprintf("too many chars for a double value at %d", i)) } else { state.AppendChar(it) state.d1 = i } } else if is_space(it) { state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1)) state.state = states.INITIAL } else if is_add(it) { state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.ADD, it, i)) state.state = states.INITIAL } else if is_sub(it) { state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.SUB, it, i)) state.state = states.INITIAL } else if is_mul(it) { state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.MUL, it, i)) state.state = states.INITIAL } else if is_div(it) { state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.DIV, it, i)) state.state = states.INITIAL } else if is_rb(it) { state.AppendToken(tokens.OfString(tokens.DoubleLiteral, string(state.buffer), state.i1)) state.AppendToken(tokens.OfRune(tokens.RB, it, i)) state.state = states.INITIAL } else { return errors.New(fmt.Sprintf("parseCharWhenFrag, invalid char %c at %d", it, i)) } return nil }
parser/tStackNode.go
定义计算栈的一个节点. 计算栈中有两种节点: 已归约的值节点, 和尚未计算的记号节点
type tStackNodeType int const NODE_VALUE tStackNodeType = 1 const NODE_TOKEN tStackNodeType = 2 type tStackNode struct { flag tStackNodeType token *tokens.Token value float64 } func newValueNode(value float64) *tStackNode { return &tStackNode{ flag: NODE_VALUE, value: value, token: nil, } } func newTokenNode(token *tokens.Token) *tStackNode { return &tStackNode{ flag: NODE_TOKEN, token: token, value: 0, } } func (me *tStackNode) getTokenType() tokens.TOKENS { switch me.flag { case NODE_VALUE: return tokens.DoubleLiteral case NODE_TOKEN: switch me.token.Token { case tokens.IntLiteral: fallthrough case tokens.DoubleLiteral: return tokens.DoubleLiteral default: return me.token.Token } } return tokens.UNKNOWN } func (me *tStackNode) getDoubleValue() float64 { switch me.flag { case NODE_VALUE: return me.value case NODE_TOKEN: switch me.token.Token { case tokens.IntLiteral: fallthrough case tokens.DoubleLiteral: v1,e1 := strconv.ParseFloat(me.token.Value, 64) if e1 != nil { panic(e1) } return v1 } } panic("value not avaiable") }
parser/parser.go
type tParser struct { tokens []*tokens.Token stack []*tStackNode total int position int } func newParser(tokens []*tokens.Token) *tParser { return &tParser{ tokens: tokens, stack: make([]*tStackNode, 0), total : len(tokens), position: -1, } } func (me *tParser) showStack() string { lst := make([]string, 0) for _,it := range me.stack { switch it.flag { case NODE_VALUE: lst = append(lst, strconv.FormatFloat(it.value, 'f', 10, 64)) break case NODE_TOKEN: switch it.token.Token { case tokens.DoubleLiteral: fallthrough case tokens.IntLiteral: lst = append(lst, it.token.Value) break default: lst = append(lst, it.token.Value) break } } } return strings.Join(lst, " ") } func (me *tParser) moreToken() bool { return me.position < me.total - 1 } func (me *tParser) nextToken() *tokens.Token { if !me.moreToken() { return nil } me.position++ return me.currentToken() } func (me *tParser) currentToken() *tokens.Token { if me.position >= me.total { return nil } return me.tokens[me.position] } func (me *tParser) reduce() { sCurrentStack := "" var fnCheckState = func() { sStackState := me.showStack() if sStackState != sCurrentStack { sCurrentStack = sStackState fmt.Printf("stack => %s\n", sStackState) } } for { fnCheckState() if me.reduceMulOrDiv() { continue } if me.reduceBrackets() { continue } if !me.moreToken() { if me.reduceAddOrSub() { break } } fnCheckState() //time.Sleep(1*time.Second) break } } func (me *tParser) stackPop() *tStackNode { if me.isStackEmpty() { return nil } var iStackSize = len(me.stack) var last = me.stack[iStackSize - 1] me.stack = me.stack[:(iStackSize-1)] return last } func (me *tParser) stackPopN(n int) []*tStackNode { if me.isStackEmpty() { return nil } var iStackSize = len(me.stack) if iStackSize < n { return nil } var lstTailItems = me.stack[(iStackSize - n):] me.stack = me.stack[:(iStackSize-n)] return lstTailItems } func (me *tParser) stackTakeN(n int) []*tStackNode { if me.isStackEmpty() { return nil } var iStackSize = len(me.stack) if iStackSize < n { return nil } var lstHeadItems = me.stack[:n] me.stack = me.stack[n+1:] return lstHeadItems } func (me *tParser) stackPush(it *tStackNode) { me.stack = append(me.stack, it) } func (me *tParser) reduceMulOrDiv() bool { if me.isStackEmpty() { return false } if me.isStackRMatch(tokens.DoubleLiteral, tokens.MUL, tokens.DoubleLiteral) { var lstTailNodes = me.stackPopN(3) v1 := lstTailNodes[0].getDoubleValue() v2 := lstTailNodes[2].getDoubleValue() var v = v1*v2 me.stackPush(newValueNode(v)) return true } else if me.isStackRMatch(tokens.DoubleLiteral, tokens.DIV, tokens.DoubleLiteral) { var lstTailNodes = me.stackPopN(3) v1 := lstTailNodes[0].getDoubleValue() v2 := lstTailNodes[2].getDoubleValue() if v2 == 0 { panic(fmt.Sprintf("div by zero, %v / %v", v1, v2)) } var v = v1/v2 me.stackPush(newValueNode(v)) return true } return false } func (me *tParser) reduceBrackets() bool { if me.isStackEmpty() { return false } if me.isStackRMatch(tokens.RB) { rb := me.stackPop() var v float64 = 0 for { if me.isStackRMatch(tokens.LB, tokens.DoubleLiteral) { var lstTailNodes = me.stackPopN(2) v += lstTailNodes[1].getDoubleValue() me.stackPush(newValueNode(v)) return true } else if me.isStackRMatch(tokens.ADD, tokens.DoubleLiteral) { var lstTailNodes = me.stackPopN(2) v1 := lstTailNodes[1].getDoubleValue() v = v + v1 } else if me.isStackRMatch(tokens.SUB, tokens.DoubleLiteral) { var lstTailNodes = me.stackPopN(2) v1 := lstTailNodes[1].getDoubleValue() v = v - v1 } else { panic(fmt.Sprintf("LB not found at %v", rb.token.Position)) } } } return false } func (me *tParser) reduceAddOrSub() bool { var v float64 = 0 for { if me.isStackEmpty() { break } if me.isStackRMatch(tokens.ADD, tokens.DoubleLiteral) { var lstTailNodes = me.stackPopN(2) v1 := lstTailNodes[1].getDoubleValue() v = v + v1 } else if me.isStackRMatch(tokens.SUB, tokens.DoubleLiteral) { var lstTailNodes = me.stackPopN(2) v1 := lstTailNodes[1].getDoubleValue() v = v - v1 } else if len(me.stack)==1 && me.isStackRMatch(tokens.DoubleLiteral) { it := me.stackPop() v += it.getDoubleValue() me.stackPush(newValueNode(v)) return true } else { panic("invalid expression") } } return false } func (me *tParser) isStackEmpty() bool { return len(me.stack) == 0 } func (me *tParser) isStackRMatch(args...tokens.TOKENS) bool { var iArgsSize = len(args) if iArgsSize <= 0 { return false } var iStackSize = len(me.stack) if iStackSize < iArgsSize { return false } for i,it := range args { var n = iStackSize - iArgsSize + i var xStackNode = me.stack[n] if it != xStackNode.getTokenType() { return false } } return true } func (me *tParser) isStackLMatch(args...tokens.TOKENS) bool { var iArgsSize = len(args) if iArgsSize <= 0 { return false } var iStackSize = len(me.stack) if iStackSize < iArgsSize { return false } for i,it := range args { var xStackNode = me.stack[i] if it != xStackNode.getTokenType() { return false } } return true } func (me *tParser) parse() (error, float64) { for { t := me.nextToken() if t == nil { break } me.stackPush(newTokenNode(t)) me.reduce() } var iStackSize = len(me.stack) if iStackSize == 1 && me.stack[0].getTokenType() == tokens.DoubleLiteral { return nil, me.stack[0].getDoubleValue() } panic(fmt.Sprintf("failed parsing expression %s", me.showStack())) } func Parse(tokens []*tokens.Token) (error, float64) { parser := newParser(tokens) return parser.parse() }
输出
=> 1+2*(3-4*(5/6+(7-8*9)))
stack => 1
stack => 1 +
stack => 1 + 2
stack => 1 + 2 *
stack => 1 + 2 * (
stack => 1 + 2 * ( 3
stack => 1 + 2 * ( 3 -
stack => 1 + 2 * ( 3 - 4
stack => 1 + 2 * ( 3 - 4 *
stack => 1 + 2 * ( 3 - 4 * (
stack => 1 + 2 * ( 3 - 4 * ( 5
stack => 1 + 2 * ( 3 - 4 * ( 5 /
stack => 1 + 2 * ( 3 - 4 * ( 5 / 6
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 +
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + (
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 -
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 8
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 8 *
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 8 * 9
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 72.0000000000
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + ( 7 - 72.0000000000 )
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + -65.0000000000
stack => 1 + 2 * ( 3 - 4 * ( 0.8333333333 + -65.0000000000 )
stack => 1 + 2 * ( 3 - 4 * -64.1666666667
stack => 1 + 2 * ( 3 - -256.6666666667
stack => 1 + 2 * ( 3 - -256.6666666667 )
stack => 1 + 2 * 259.6666666667
stack => 1 + 519.3333333333
520.3333333333
=>
以上就是golang 四则运算计算器 yacc 归约手写实现的详细内容,更多关于golang四则运算计算器yacc归约的资料请关注脚本之家其它相关文章!