最近在学习编译原理。由于实验要求有词法分析器,这里我就先记录一下词法分析器实现过程以及具体思路。
此处我选择的目标语言是c语言的子集来进行词法分析。
此处我选用的语言是python,主要还是考虑到python的数据结构比较强大而且包容性强。并且我pyqt用的比较熟练,很容易设计出GUI界面。关于pyqt的相关内容网上资料比较少对初学者不是很友好,我下面会出一些关于pyqt的教程,还望持续关注!
词法分析不必要设计成单独的一遍,我认为词法分析器应该设计成一个子程序,每当语法分析需要一个单词符号时,那么此时向词法分析器传递一个输入串,词法分析器便要能分析出这个输入串中的单词。
对于单词的分析关键在于第一个字符的性质。第一个字符的性质决定了下面的单词分析进程。如果第一个字符是一个数字那么下面这个单词就要判断是否为常量。接下来读取的如果是字符除了是e或E其他字符都可以直接判断此单词非法为error。因此这里可以将其单独分离出一个函数,这里我取名为isDigit()函数。其他包括标识符的判定以及算术或逻辑运算符的判定也可以按照此思路分离出相应的函数。
函数表
self.reserveWord = ["auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
"register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"] # c++中的关键字
self.operatorOrDelimiter = ["+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
"!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",
"&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{",
"}", "\\", ".", ":", "!"] # c++中的一些符号
self.Delimiter = [";", "(", ")", ",", "#", "[", "]", "{", "}", "\\"]
self.RelationOperation = ["<", "<=", ">", ">=", "=", "==", "!=", "^", "&", "&&", "|", "||", "<<", ">>", "!"]
self.Operator = ["+", "-", "*", "/", "%", "~", "+=", "*=", "/=", "-=",
"++", "--"]
// 判断数字代码
def IsDigits(self, inString, pos):
flag = False
for i in inString:
pos += 1
if i.isdigit():
self.token += str(i)
flag = True
elif i == '.' and i not in self.token and 'e' not in self.token and 'E' not in self.token:
self.token += str(i)
elif i == 'e' or i == 'E' and i not in self.token: # 处理含E或e的合法指数情况
self.token += str(i)
else:
if i in self.operatorOrDelimiter or i == ' ' or i == '\n':
flag = True
else:
flag = False
break
return flag, pos
if inString[0].isdigit():
judge, index = self.IsDigits(inString, 0)
if judge:
"""if '.' in self.token: # 此处是对常量的转化过程此处写成注释
print("--------")
print(float(self.token))
print("--------")
elif 'e' not in self.token and 'E' not in self.token:
print("--------")
print(int(self.token))
print("--------")
else:
num1 = 0
num2 = 0
if 'E' in self.token:
l = self.token.split('E')
if '.' in l[0]:
num1 = float(l[0])
else:
num1 = int(l[0])
num2 = int(l[1])
elif 'e' in self.token:
l = self.token.split('e')
if '.' in l[0]:
num1 = float(l[0])
else:
num1 = int(l[0])
num2 = int(l[1])
for i in range(0, num2):
num1 *= 10
print("--------")
print(num1)
print("--------")"""
self.result.append([self.token, "常数", (row, col)])
else:
self.result.append([self.token, "ERROR", (row, col)])
if index - 1 < len(inString) and index - 1 > 0:
if index == len(inString):
print(inString[len(inString) - 1])
self.scan(inString[len(inString) - 1], row, col)
else:
self.scan(inString[index - 1:], row, col)
def isReserve(self, target): # 判断是否为关键字
if target in self.reserveWord:
return True
return False
def isMark(self, inString, pos):
flag = False
for i in inString:
pos += 1
if i.isalpha() or i.isdigit() or i == '_':
self.token += str(i)
flag = True
elif i in self.operatorOrDelimiter: # 遇到算术/逻辑/分隔符结束搜索
flag = True
break
else:
flag = False
return flag, pos
elif inString[0].isalpha():
judge, index = self.isMark(inString, 0)
if self.isReserve(self.token):
self.result.append([self.token, "关键字", (row, col)])
else:
self.result.append([self.token, "标识符", (row, col)])
if index <= len(inString) and not inString[index - 1].isalpha():
self.scan(inString[index - 1:], row, col)
elif inString[0] == '\'':
judge, index = self.isChar(inString, 0)
if judge:
self.result.append([self.token, "字符常量", (row, col)])
if index < len(inString) and index - 1 >= 0:
self.scan(inString[index - 1:], row, col)
elif inString[0] == '\"':
judge, index = self.isString(inString, 0)
index = index + 1 # 最后一个”不能算
if judge:
self.result.append([self.token, "字符串常量", (row, col)])
if index < len(inString) and index - 1 >= 0:
self.scan(inString[index - 1:], row, col)
elif inString[0] in self.Operator or inString[0] in self.RelationOperation:
index = self.IsOperator(inString, 0)
if self.token in self.Operator:
self.result.append([self.token, "算术运算符", (row, col)])
elif self.token in self.RelationOperation:
self.result.append([self.token, "关系运算符", (row, col)])
else:
self.result.append([self.token, "ERROR", (row, col)])
if index <= len(inString) and index - 1 >= 0:
self.scan(inString[index - 1:], row, col)
def IsOperator(self, inString, pos):
if len(inString) == 1:
self.token += str(inString[0])
return pos
for i in inString[0:]:
pos += 1
if i in self.operatorOrDelimiter:
self.token += str(i)
else:
break
return pos
elif inString[0] in self.Operator or inString[0] in self.RelationOperation:
index = self.IsOperator(inString, 0)
if self.token in self.Operator:
self.result.append([self.token, "算术运算符", (row, col)])
elif self.token in self.RelationOperation:
self.result.append([self.token, "关系运算符", (row, col)])
else:
self.result.append([self.token, "ERROR", (row, col)])
if index <= len(inString) and index - 1 >= 0:
self.scan(inString[index - 1:], row, col)
运行测试图片
我加入了GUI界面并对其进行了代码优化,优化的代码在上面已经进行了更新。下面贴出整体GUI部分代码
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'analysis.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(961, 816)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.code = QtWidgets.QTextEdit(self.centralwidget)
self.code.setGeometry(QtCore.QRect(10, 10, 781, 391))
self.code.setObjectName("code")
self.result = QtWidgets.QTextEdit(self.centralwidget)
self.result.setGeometry(QtCore.QRect(0, 410, 951, 361))
self.result.setObjectName("result")
self.gridLayoutWidget = QtWidgets.QWidget(self.centralwidget)
self.gridLayoutWidget.setGeometry(QtCore.QRect(800, 50, 161, 231))
self.gridLayoutWidget.setObjectName("gridLayoutWidget")
self.gridLayout = QtWidgets.QGridLayout(self.gridLayoutWidget)
self.gridLayout.setContentsMargins(0, 0, 0, 0)
self.gridLayout.setObjectName("gridLayout")
self.pushButton = QtWidgets.QPushButton(self.gridLayoutWidget)
self.pushButton.setObjectName("pushButton")
self.gridLayout.addWidget(self.pushButton, 0, 0, 1, 1)
self.pushButton_2 = QtWidgets.QPushButton(self.gridLayoutWidget)
self.pushButton_2.setObjectName("pushButton_2")
self.gridLayout.addWidget(self.pushButton_2, 1, 0, 1, 1)
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 961, 26))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.pushButton.setText(_translate("MainWindow", "词法分析"))
self.pushButton_2.setText(_translate("MainWindow", "重试"))
主控窗口
import sys
from PyQt5 import QtWidgets, QtCore
from analysis import Analysis
from MainWindow import Ui_MainWindow
class My_Window(QtWidgets.QMainWindow, Ui_MainWindow):
def __init__(self):
super(My_Window, self).__init__()
self.setupUi(self)
@QtCore.pyqtSlot()
def on_pushButton_clicked(self):
deal = Analysis()
string = self.code.toPlainText()
j = 0
results = []
for i in string.split('\n'):
if i != '':
initial = i.split(' ')
col = 1
for s in initial:
deal.scan(s, j + 1, col)
col += 1
for res in deal.result:
results.append(res)
deal.result = []
j += 1
title = "单词 二元序列 类 型 位置(行,列)\n (单词种别,单词属性)\n"
for result in results:
title += '{:<10}{:<20}{:<20}{:<15}'.format(str(result[0]),
'(' + str(deal.dic[result[1]]) + ',' + result[1] + ')', result[
1], str(result[2])) + '\n'
self.result.setText(title)
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
my_window = My_Window()
my_window.show()
sys.exit(app.exec())
由于我们编译原理课程实验还未验收此处我就不把完整代码放出但我把截图放在下方,希望大家可以对照参考同时也欢迎私信我交流讨论。
如果您想要完整代码,欢迎私信我并说明用途,感谢您的支持!
我又重新更新完善了一下此处把代码贴出,欢迎大家进行测试,如果有什么需要改进的地方欢迎指出~~
import sys
from PyQt5 import QtWidgets, QtCore
from analysis import Analysis
from MainWindow import Ui_MainWindow
class My_Window(QtWidgets.QMainWindow, Ui_MainWindow):
def __init__(self):
super(My_Window, self).__init__()
self.setupUi(self)
@QtCore.pyqtSlot()
def on_pushButton_clicked(self):
deal = Analysis()
string = self.code.toPlainText()
j = 0
results = []
deal.scan(string,1,1)
for i in string.split('\n'):
if i != '':
initial = i.split(' ')
col = 1
for s in initial:
deal.scan(s, j + 1, col)
col += 1
for res in deal.result:
results.append(res)
deal.result = []
j += 1
title = "单词 二元序列 类 型 位置(行,列)\n (单词种别,单词属性)\n"
for result in results:
title += '{:<10}{:<20}{:<20}{:<15}'.format(str(result[0]),
'(' + str(deal.dic[result[1]]) + ',' + result[1] + ')', result[
1], str(result[2])) + '\n'
self.result.setText(title)
@QtCore.pyqtSlot()
def on_pushButton_2_clicked(self):
self.result.clear()
self.code.clear()
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
my_window = My_Window()
my_window.show()
sys.exit(app.exec())
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'analysis.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(961, 816)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.code = QtWidgets.QTextEdit(self.centralwidget)
self.code.setGeometry(QtCore.QRect(10, 10, 781, 391))
self.code.setObjectName("code")
self.result = QtWidgets.QTextEdit(self.centralwidget)
self.result.setGeometry(QtCore.QRect(0, 410, 951, 361))
self.result.setObjectName("result")
self.gridLayoutWidget = QtWidgets.QWidget(self.centralwidget)
self.gridLayoutWidget.setGeometry(QtCore.QRect(800, 50, 161, 231))
self.gridLayoutWidget.setObjectName("gridLayoutWidget")
self.gridLayout = QtWidgets.QGridLayout(self.gridLayoutWidget)
self.gridLayout.setContentsMargins(0, 0, 0, 0)
self.gridLayout.setObjectName("gridLayout")
self.pushButton = QtWidgets.QPushButton(self.gridLayoutWidget)
self.pushButton.setObjectName("pushButton")
self.gridLayout.addWidget(self.pushButton, 0, 0, 1, 1)
self.pushButton_2 = QtWidgets.QPushButton(self.gridLayoutWidget)
self.pushButton_2.setObjectName("pushButton_2")
self.gridLayout.addWidget(self.pushButton_2, 1, 0, 1, 1)
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 961, 26))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.pushButton.setText(_translate("MainWindow", "词法分析"))
self.pushButton_2.setText(_translate("MainWindow", "重试"))
"""
词法分析器的实现
"""
class Analysis:
def __init__(self):
self.reserveWord = ["auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
"register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"] # c++中的关键字
self.operatorOrDelimiter = ["+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
"!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",
"&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{",
"}", "\\", ".", ":", "!"] # c++中的一些符号
self.Delimiter = [";", "(", ")", ",", "#", "[", "]", "{", "}", "\\"]
self.RelationOperation = ["<", "<=", ">", ">=", "=", "==", "!=", "^", "&", "&&", "|", "||", "<<", ">>", "!"]
self.Operator = ["+", "-", "*", "/", "%", "~", "+=", "*=", "/=", "-=",
"++", "--"]
self.token = "" # 得到的单词
self.result = [] # 储存扫描得到的单词信息结果
self.dic = {'标识符': 2, '关键字': 1, "常数": 3, "算术运算符": 4, "关系运算符": 5, "字符串常量": 6,
"字符常量": 7, "分界符": 8,"ERROR":9}
def isReserve(self, target): # 判断是否为关键字
if target in self.reserveWord:
return True
return False
def isMark(self, inString, pos):
flag = False
for i in inString:
pos += 1
if i.isalpha() or i.isdigit() or i == '_':
self.token += str(i)
flag = True
elif i in self.operatorOrDelimiter: # 遇到算术/逻辑/分隔符结束搜索
flag = True
break
else:
flag = False
return flag, pos
def IsDigits(self, inString, pos):
flag = False
for i in inString:
pos += 1
if i.isdigit():
self.token += str(i)
flag = True
elif i == '.' and i not in self.token and 'e' not in self.token and 'E' not in self.token:
self.token += str(i)
elif i == 'e' or i == 'E' and i not in self.token: # 处理含E或e的合法指数情况
self.token += str(i)
else:
if i in self.operatorOrDelimiter or i == ' ' or i == '\n':
flag = True
else:
flag = False
break
return flag, pos
def isChar(self, inString, pos):
flag = False
self.token += str(inString[0])
if len(inString) < 3:
return False, pos + len(inString)
else:
if inString[1].isalpha() and inString[2] == "\'":
self.token += str(inString[1])
self.token += str(inString[2])
if len(inString) > 3:
if inString[3] in self.Delimiter or inString[3] in self.Operator:
pos = 3
return True, pos
else:
return True, 3
else:
return False, 3
def isString(self, inString, pos):
flag = False
self.token += str(inString[0])
for i in inString[1:]:
pos = pos + 1
if i == '\"':
self.token += str(i)
flag = True
break
if i.isalpha():
self.token += str(i)
else:
return False, pos
return flag, pos
def IsOperator(self, inString, pos):
if len(inString) == 1:
self.token += str(inString[0])
return pos
for i in inString[0:]:
pos += 1
if i in self.operatorOrDelimiter:
self.token += str(i)
else:
break
return pos
def scan(self, inString, row, col):
"""
扫描字符串
:param col: 储存当前扫描的列
:param row: 储存当前扫描的行
:type inString: 待处理的字符串
:return: 对字符串的判断结果,类型为列表
"""
inString = str(inString).strip() # 去除字符串两端可能含有的空格
self.token = ""
if inString[0].isdigit():
judge, index = self.IsDigits(inString, 0)
if judge:
"""if '.' in self.token: # 此处是对常量的转化过程此处写成注释
print("--------")
print(float(self.token))
print("--------")
elif 'e' not in self.token and 'E' not in self.token:
print("--------")
print(int(self.token))
print("--------")
else:
num1 = 0
num2 = 0
if 'E' in self.token:
l = self.token.split('E')
if '.' in l[0]:
num1 = float(l[0])
else:
num1 = int(l[0])
num2 = int(l[1])
elif 'e' in self.token:
l = self.token.split('e')
if '.' in l[0]:
num1 = float(l[0])
else:
num1 = int(l[0])
num2 = int(l[1])
for i in range(0, num2):
num1 *= 10
print("--------")
print(num1)
print("--------")"""
self.result.append([self.token, "常数", (row, col)])
else:
print(index)
self.result.append([self.token, "ERROR", (row, col)])
if index < len(inString) and index - 1 > 0:
if index == len(inString):
self.scan(inString[len(inString) - 1], row, col)
else:
self.scan(inString[index - 1:], row, col)
elif inString[0].isalpha():
judge, index = self.isMark(inString, 0)
if self.isReserve(self.token):
self.result.append([self.token, "关键字", (row, col)])
else:
self.result.append([self.token, "标识符", (row, col)])
if index <= len(inString) and not inString[index - 1].isalpha():
self.scan(inString[index - 1:], row, col)
elif inString[0] == '\'':
judge, index = self.isChar(inString, 0)
if judge:
self.result.append([self.token, "字符常量", (row, col)])
if len(inString) > index > 0:
self.scan(inString[index:], row, col)
elif inString[0] == '\"':
judge, index = self.isString(inString, 0)
index = index + 1 # 最后一个”不能算
if judge:
self.result.append([self.token, "字符串常量", (row, col)])
if index < len(inString) and index - 1 >= 0:
self.scan(inString[index - 1:], row, col)
elif inString[0] in self.Operator or inString[0] in self.RelationOperation:
index = self.IsOperator(inString, 0)
if self.token in self.Operator:
self.result.append([self.token, "算术运算符", (row, col)])
elif self.token in self.RelationOperation:
self.result.append([self.token, "关系运算符", (row, col)])
else:
self.result.append([self.token, "ERROR", (row, col)])
if index <= len(inString) and index - 1 >= 0:
self.scan(inString[index-1:], row, col)
elif inString[0] in self.Delimiter:
if len(inString) == 1:
self.token += str(inString[0])
self.result.append([self.token, "分界符", (row, col)])
elif len(inString) > 1 and inString[1] in self.Delimiter: # 分隔符为一个字符
self.token += str(inString)
self.result.append([self.token, "ERROR", (row, col)])
elif len(inString) > 1 and inString[1] not in self.Delimiter:
self.token += str(inString[0])
self.result.append([self.token, "分界符", (row, col)])
self.scan(inString[1:], row, col)
"""elif inString[0] in self.operatorOrDelimiter:
judge, index = self.IsOperation(inString, 0)
self.result.append([self.token, "操作符或分隔符", (row, col)])
if index < len(
inString) and index - 1 >= 0: # 主要是考虑当只有一个字符的情况,因为我在判断操作符时,当首先判断出其长度为一时会默认是操作符,并不再进行判断,导致会出现死循环
self.scan(inString[index - 1:], row, col)"""
if __name__ == "__main__":
analysis = Analysis()
initial = input("请输入代码:").split(' ')
col = 1
for s in initial:
analysis.scan(s, 1, col)
col += 1
for res in analysis.result:
print(res)