LR(0) 文法分析器

实验介绍

LR(0)分析表是LR(0)分析器的重要组成部分,它是总控程序分析动作的依据。对于不同的文法,LR(0)分析表不同,它可以用一个二维数组表示,行标为状态号,列标为文法符号和’#’号,分析表的内容可由两部分组成,一部分为动作(ACTION)表,它表示当前状态下所面临输入符应做的动作是移进、归约、接受或出错,动作表的行标只包含终结符和’#’,另一部分为转换表(GOTO),它表示在当前状态下面临文法符号时应转向的下一个状态,相当于识别活前缀的有限自动机DFA的状态转换矩阵。因此构造一个文法的LR(0)分析表时,首先应构造识别活前缀的自动机DFA,这样可以很方便地利用DFA的项目集和状态转换函数构造它的LR(0)分析表,在实际应用中为了节省存储空间,通常把关于终结符部分的GOTO表和ACTION表重叠,也就是把当前状态下面临终结符应作的移进-归约动作和转向动作用同一数组元素表示。

实验代码

source.txt 保存了文法

Y->S
S->BB
B->aB
B->b

main.py 保存了程序源码

VN = []  # 非终结符
VT = []  # 终结符
NFA = []  # NFA表
DFA = []  # DFA表
grammar = []  # 读入的文法
doted_grammar = []  # 加点后的文法
VN2Int = {}  # 非终结符映射
VT2Int = {}  # 终结符映射
action = []  # action表
goto = []  # goto表
DFA_node = []  # DFA节点表
status_stack = []  # 状态栈
symbol_stack = []  # 符号栈
now_state = ''  # 栈顶状态
input_ch = ''  # 栈顶字符
input_str = ''  # 输入串
location = 0  # 输入位置
now_step = 0  # 当前步骤


# 读取文法
def read_grammar(file_name):
    global grammar
    with open(file_name, 'r') as file:
        for line in file:
            line = line.replace('\n', "")
            grammar.append(line)
        file.close()


# 找到终结符和非终结符
def find_term_non():
    global grammar
    n = int(len(grammar))
    temp_vt = []
    l = 0
    for i in range(n):
        X, Y = grammar[i].split('->')
        if X not in VN:
            VN.append(X)
            VN2Int.update({X: l})
            l += 1
        for Yi in Y:
            temp_vt.append(Yi)

    m = 0
    for i in temp_vt:
        if i not in VN and i not in VT:
            VT.append(i)
            VT2Int.update({i: m})
            m += 1
    VT.append('#')
    VT2Int.update({'#': m})


# 在字符串某个位置加点
def add_char2str(grammar_i, i):
    grammar_i = grammar_i[0:i] + '.' + grammar_i[i:len(grammar_i)]
    return grammar_i


# 给文法加点
def add_dot():
    global doted_grammar
    j = 0
    n = 0
    for i in grammar:
        for k in range(len(i) - 2):
            doted_grammar.append([])
            doted_grammar[n].append(add_char2str(i, k + 3))
            doted_grammar[n].append('false')
            n += 1
        j += 1


# 显示加点后的文法
def print_doted_grammar():
    print('----加点后的文法----')
    j = 1
    for i in doted_grammar:
        print('%d.%s' % (j, i[0]))
        j += 1


# 显示读入文法
def print_read_grammar():
    print('----读入的文法----')
    j = 0
    for i in grammar:
        print('(%d)%s' % (j, i))
        j += 1


# 初始化NFA
def init_nfa():
    global NFA
    for row in range(len(doted_grammar)):
        NFA.append([])
        for col in range(len(doted_grammar)):
            NFA[row].append('')


# 找到点的位置
def find_pos_point(one_grammar):
    return one_grammar.find('.')


# 文法是否以start开头,以'.'开始
def is_start(grammar_i, start):
    if grammar_i[0].find(start, 0, 1) + grammar_i[0].find('.', 3, 4) == 3:
        return True
    else:
        return False


# 查找以start开头,以'.'开始的文法,返回个数
def find_node(start, grammar_id):
    num = 0
    for i in doted_grammar:
        if is_start(i, start):
            grammar_id[num] = doted_grammar.index(i)
            num += 1
    return num


# 构造NFA
def make_nfa():
    global NFA
    grammar_id = []
    for i in range(10):
        grammar_id.append('')
    init_nfa()
    i = 0
    for grammar_i in doted_grammar:
        pos_point = find_pos_point(grammar_i[0])  # 找到点的位置
        if not pos_point + 1 == len(grammar_i[0]):
            NFA[i][i + 1] = grammar_i[0][pos_point + 1]
            if grammar_i[0][pos_point + 1] in VN:  # 点后面跟着非终结符
                j = find_node(grammar_i[0][pos_point + 1], grammar_id)
                for k in range(j):
                    NFA[i][grammar_id[k]] = '*'
                    add_more(i, grammar_id[k])
        i += 1


# 查找关联
def add_more(i, j):
    global NFA
    grammar_id = []
    for k in range(10):
        grammar_id.append('')
    pos_point = find_pos_point(doted_grammar[j][0])
    if not pos_point + 1 == len(doted_grammar[j][0]):
        if doted_grammar[j][0][pos_point + 1] in VN:
            j = find_node(doted_grammar[j][0][pos_point + 1], grammar_id)
            for k in range(j):
                NFA[i][grammar_id[k]] = '*'
                add_more(i, grammar_id[k])


# 初始化DFA
def init_dfa():
    global DFA
    for row in range(len(doted_grammar)):
        DFA.append([])
        for col in range(len(doted_grammar)):
            DFA[row].append('')


# 连接
def add_state(to, fro):
    for i in range(len(doted_grammar)):
        if not NFA[to][i] == '' and not NFA[to][i] == '*':
            DFA[to][i] = NFA[to][i]
        if not NFA[fro][i] == '' and not NFA[fro][i] == '*':  # from可连接的点
            DFA[to][i] = NFA[fro][i]


# 构造DFA
def make_dfa():
    global NFA, doted_grammar, DFA_node
    init_dfa()
    for i in range(len(doted_grammar)):
        DFA_node.append([])
        for j in range(len(doted_grammar)):
            DFA_node[i].append("")
    for i in range(len(doted_grammar)):
        if doted_grammar[i][1] == 'false':
            k = 0
            DFA_node[i][k] = doted_grammar[i][0]
            k += 1
            doted_grammar[i][1] = 'true'
            for j in range(len(doted_grammar)):
                if NFA[i][j] == '*':  # 有ε弧
                    DFA_node[i][k] = doted_grammar[j][0]
                    k += 1
                    doted_grammar[j][1] = 'true'
                    add_state(i, j)


# 初始化LR分析表
def init_lr_table():
    global doted_grammar, action, goto
    for i in range(len(doted_grammar)):
        action.append([])
        goto.append([])
        for j in range(len(VT)):
            action[i].append('')
        for j in range(len(VN)):
            goto[i].append(-1)


# 有无规约项
def need_protocol(point):
    global DFA_node
    if not DFA_node[point][0] == "":
        for i in range(10):
            if DFA_node[point][i].endswith('.'):
                return DFA_node[point][i]
            else:
                return None
    else:
        return None


# 根据文法内容找到文法编号
def find_grammar(string):
    global grammar
    tmp = string[0: len(string) - 1]
    for i in range(len(grammar)):
        if tmp == grammar[i]:
            return i


# 填充LR分析表
def fill_lr_table():
    global doted_grammar, VT2Int, VN2Int, VN
    init_lr_table()
    for i in range(len(doted_grammar)):
        if need_protocol(i):
            num = find_grammar(need_protocol(i))
            tmp = 'r' + str(num)
            for j in range(len(VT)):
                if i == 1:
                    action[i][VT2Int['#']] = 'acc'
                else:
                    action[i][j] = tmp
        else:
            for j in range(len(doted_grammar)):
                if not DFA[i][j] == '':
                    if DFA[i][j] in VN:
                        goto[i][VN2Int.get(DFA[i][j], -1)] = j
                    else:
                        tmp = 's' + str(j)
                        action[i][VT2Int.get(DFA[i][j], -1)] = tmp


# 显示LR分析表
def print_lr_table():
    global VT, VN, doted_grammar, action, goto
    # 表头
    print('----LR分析表----')
    print('\t\t|\t', end='')
    print(('%3s' % '') * (len(VT) - 2), end='')
    print('Action', end='')
    print(('%3s' % '') * (len(VT) - 2), end='')
    print('\t|\t', end='')
    print(('%3s' % '') * (len(VN) - 2), end='')
    print('GOTO', end='')
    print(('%3s' % '') * (len(VN) - 2), end='')
    print('\t|')
    print('\t\t\t', end='')
    for i in VT:
        print('%3s\t' % i, end='')
    print('\t|\t', end='')
    k = 0
    for i in VN:
        if not k == 0:
            print('%3s\t' % i, end='')
        k += 1
    print('\t|')
    for i in range(len(doted_grammar)):
        print('-----', end='')
    print()
    # 表体
    for i in range(len(doted_grammar)):
        print('%5d\t|\t' % i, end='')
        for j in range(len(VT)):
            print('%4s' % action[i][j], end='')
        print('\t|\t', end='')
        for j in range(len(VN)):
            if not j == 0:
                if not goto[i][j] == -1:
                    print('%4s' % goto[i][j], end='')
                else:
                    print('\t', end='')
        print('\t|')
    for i in range(len(doted_grammar)):
        print('-----', end='')
    print()


# 判断分析是否完成
def is_end():
    if input_str[location:len(input_str)] == '#':
        if symbol_stack[-1] == 'X' and symbol_stack[-2] == '#':
            return True
        else:
            return False
    else:
        return False


# 输出
def output():
    global now_step, status_stack, symbol_stack, input_str, now_state
    print('%d\t\t' % now_step, end='')
    now_step += 1
    print('%-20s' % status_stack, end='')
    print('%-25s' % symbol_stack, end='')
    print('%-22s' % input_str[location:len(input_str)], end='')


# 统计产生式右部的个数
def count_right_num(grammar_i):
    return len(grammar_i) - 3


# 规约
def do_stipulations():
    global status_stack, input_str, symbol_stack, location, now_state, input_ch
    print('----Anysis Process----')
    print("index\t\t", end='')
    print('%-17s' % 'Status', end='')
    print('%-22s' % 'Symbol', end='')
    print('%-20s' % 'Input', end='')
    print('Action')
    for i in range(len(doted_grammar)):
        print('-----------', end='')
    print()
    symbol_stack.append('#')
    status_stack.append(0)
    while not is_end():
        now_state = status_stack[-1]
        input_ch = input_str[location]
        output()
        find = action[now_state][VT2Int[input_ch]]
        if find[0] == 's':
            symbol_stack.append(input_ch)
            status_stack.append(int(find[1]))
            location += 1
            print('action[%s][%s]=s%s,即状态=%s入栈' % (now_state, input_ch, find[1], find[1]))
        elif find[0] == 'r':
            num = int(find[1])
            g = grammar[num]
            right_num = count_right_num(g)
            for i in range(right_num):
                status_stack.pop()
                symbol_stack.pop()
            symbol_stack.append(g[0])
            now_state = status_stack[-1]
            symbol_ch = symbol_stack[-1]
            find = goto[now_state][VN2Int.get(symbol_ch, -1)]
            if find == -1:
                print('****分析失败****')
                break
            status_stack.append(find)
            print('r%s:用%s规约,且GOTO(%s,%s)=%s入栈' % (num, g, status_stack[-2], symbol_stack[-1], find))
        else:
            break
    print("acc:OK")


if __name__ == '__main__':
    # 读入文法,给文法加点
    read_grammar('src.txt')
    add_dot()
    print_read_grammar()
    print_doted_grammar()
    find_term_non()
    # 构造NFA
    make_nfa()
    # 构造DFA
    make_dfa()
    # 构造分析表
    fill_lr_table()
    print_lr_table()
    # 规约
    input_str = 'abab#'
    do_stipulations()

实验结果

读入的文法和加上点后的文法:
LR(0) 文法分析器_第1张图片

LR(0)分析表:
LR(0) 文法分析器_第2张图片

分析过程:
LR(0) 文法分析器_第3张图片

你可能感兴趣的:(LR(0) 文法分析器)