Tesseract:主要用于进行OCR识别
PyQt: 主要用于界面的设计和功能与界面控件的连接
主要界面:
功能实现界面:
# -*- coding: utf-8 -*-
import os
import sys
import pytesseract
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QImage, QPixmap
from PyQt5.QtWidgets import *
from PIL import Image, ImageGrab
import numpy as np
import pyperclip
import time
class Ui_MainWindow(object):
def __init__(self):
self.file_name = ""
self.Pixmap = QPixmap()
self.ocr_img = 0
# -------------以下均为UI设计的部分(QT_Designer进行UI界面设计)----------
# 利用pyui5工具进行变换
# 在终端输入: pyuic5 -o my_ui.py my_ui.ui
# 生成py在ui所在文件夹
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(1170, 812)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.image = QtWidgets.QLabel(self.centralwidget)
self.image.setGeometry(QtCore.QRect(10, 10, 740, 791))
self.image.setMinimumSize(QtCore.QSize(500, 600))
self.image.setStyleSheet("background-color: rgb(150, 150, 150);")
self.image.setText("")
self.image.setObjectName("image")
self.widget = QtWidgets.QWidget(self.centralwidget)
self.widget.setGeometry(QtCore.QRect(760, 8, 402, 791))
self.widget.setObjectName("widget")
self.gridLayout = QtWidgets.QGridLayout(self.widget)
self.gridLayout.setContentsMargins(0, 0, 0, 0)
self.gridLayout.setObjectName("gridLayout")
self.Tip_label = QtWidgets.QLabel(self.widget)
self.Tip_label.setMaximumSize(QtCore.QSize(400, 16))
self.Tip_label.setAlignment(QtCore.Qt.AlignLeading | QtCore.Qt.AlignLeft | QtCore.Qt.AlignTop)
self.Tip_label.setObjectName("Tip_label")
self.gridLayout.addWidget(self.Tip_label, 0, 0, 1, 2)
self.Tip_label_2 = QtWidgets.QLabel(self.widget)
self.Tip_label_2.setMaximumSize(QtCore.QSize(400, 16))
self.Tip_label_2.setAlignment(QtCore.Qt.AlignLeading | QtCore.Qt.AlignLeft | QtCore.Qt.AlignTop)
self.Tip_label_2.setObjectName("Tip_label_2")
self.gridLayout.addWidget(self.Tip_label_2, 1, 0, 1, 2)
self.read_file = QtWidgets.QPushButton(self.widget)
self.read_file.setMinimumSize(QtCore.QSize(151, 31))
font = QtGui.QFont()
font.setPointSize(11)
self.read_file.setFont(font)
self.read_file.setStyleSheet("background-color: rgb(52, 52, 52);\n"
"color: rgb(255, 255, 255);")
self.read_file.setObjectName("read_file")
self.gridLayout.addWidget(self.read_file, 2, 0, 1, 1)
self.read_scr = QtWidgets.QPushButton(self.widget)
self.read_scr.setMinimumSize(QtCore.QSize(151, 31))
font = QtGui.QFont()
font.setPointSize(11)
self.read_scr.setFont(font)
self.read_scr.setStyleSheet("background-color: rgb(52, 52, 52);\n"
"color: rgb(255, 255, 255);")
self.read_scr.setObjectName("read_scr")
self.gridLayout.addWidget(self.read_scr, 2, 1, 1, 1)
self.ocr_start = QtWidgets.QPushButton(self.widget)
self.ocr_start.setMinimumSize(QtCore.QSize(400, 31))
font = QtGui.QFont()
font.setPointSize(11)
self.ocr_start.setFont(font)
self.ocr_start.setStyleSheet("background-color: rgb(140, 132, 198);")
self.ocr_start.setObjectName("ocr_start")
self.gridLayout.addWidget(self.ocr_start, 3, 0, 1, 2)
self.ocr_result = QtWidgets.QTextEdit(self.widget)
self.ocr_result.setMinimumSize(QtCore.QSize(300, 300))
self.ocr_result.setObjectName("ocr_result")
self.gridLayout.addWidget(self.ocr_result, 4, 0, 1, 2)
self.save_image = QtWidgets.QPushButton(self.widget)
self.save_image.setMinimumSize(QtCore.QSize(100, 31))
font = QtGui.QFont()
font.setFamily("黑体")
font.setPointSize(10)
self.save_image.setFont(font)
self.save_image.setStyleSheet("background-color: rgb(96, 141, 110);")
self.save_image.setObjectName("save_image")
self.gridLayout.addWidget(self.save_image, 5, 0, 1, 1)
self.copy = QtWidgets.QPushButton(self.widget)
self.copy.setMinimumSize(QtCore.QSize(100, 31))
font = QtGui.QFont()
font.setFamily("黑体")
font.setPointSize(10)
self.copy.setFont(font)
self.copy.setStyleSheet("background-color: rgb(96, 141, 110);")
self.copy.setObjectName("copy")
self.gridLayout.addWidget(self.copy, 5, 1, 1, 1)
MainWindow.setCentralWidget(self.centralwidget)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
self.read_scr.clicked.connect(self.read_cap)
self.read_file.clicked.connect(self.read_img)
self.copy.clicked.connect(self.copy_result)
self.save_image.clicked.connect(self.save_file)
self.ocr_start.clicked.connect(self.start_OCR)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "中文OCR"))
self.Tip_label.setText(_translate("MainWindow", " 提示:window截屏(win键 + Shift + s 或PrtSc键)"))
self.Tip_label_2.setText(_translate("MainWindow", " 提示:保存路径(C:\\image_save\\*.jpg)"))
self.read_file.setText(_translate("MainWindow", "读取图像文件"))
self.read_scr.setText(_translate("MainWindow", "读取截屏"))
self.ocr_start.setText(_translate("MainWindow", "开始识别"))
self.save_image.setText(_translate("MainWindow", "保存截图"))
self.copy.setText(_translate("MainWindow", "一键复制"))
# -------------以上均为UI设计的部分----------
# ------------以下内容为后端处理部分----------
# -----读取剪切板中的截屏-----
def read_cap(self):
try:
scr = ImageGrab.grabclipboard()
self.ocr_img = np.array(scr)
label_image = QImage(self.ocr_img.data, self.ocr_img.shape[1], self.ocr_img.shape[0],
QImage.Format_RGBA8888) # 转化为QImage
except IndexError: # 如果剪切板中非截屏,提示报错
QtWidgets.QMessageBox.critical(None, "错误", "请先进行截屏")
else:
self.Pixmap = QPixmap(label_image) # 将图像转换为pixmap
re_image = show_image(self.Pixmap) # 截屏显示图大小变换
self.image.setPixmap(re_image) # 在界面上进行显示
# ----- 读取已有图像文件 -----
def read_img(self):
self.file_name = ""
self.file_name, _ = QFileDialog.getOpenFileName(None, "选择图片",
"C:\\",
"Image (*.jpg *.gif *.png *.jpeg *.pgm *.pbm *.ppm *.xpm);; \
JPEG Files(*.jpg);;PNG Files(*.png);;PGM Files(*.pgm)")
if self.file_name is not "":
self.Pixmap = QPixmap(self.file_name) # 直接将图像读取为pixmap
self.ocr_img = pixmap_to_cvimg(self.Pixmap) # pixmap 转换为图像
re_image = self.Pixmap.scaled(self.image.size,Qt.KeepAspectRatio)
self.image.setPixmap(re_image) # 在界面上进行显示
# ----- 图像进行OCR识别出文字 -----
def start_OCR(self):
img = Image.fromarray(self.ocr_img)
img = img.convert('L') # 转为灰度图,利用灰度图进行识别,(根据应用场景进一步预处理可提高识别率)
text = pytesseract.image_to_string(self.ocr_img, lang='chi_sim') # 利用pytesseract进行文字识别,chi_sim即为中文识别
self.ocr_result.setText(text.replace("\n\n", "\n"))
# ----- 文字一键复制到剪切板 -----
def copy_result(self):
pyperclip.copy(self.ocr_result.toPlainText())
# ----- 保存截屏,命名为当前时间,图像保存类型为png -----
def save_file(self):
now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time())) # 获取当前时间,并进行格式转换
path = "C:/image_save/"
if not os.path.exists(path): # 如果文件夹不存在,新建一个
os.mkdir(path)
file = path + now + ".png"
self.Pixmap.save(file) # 进行保存
msg_box = QMessageBox(QMessageBox.Warning, "OK", "图像已保存!") # 弹出提示保存状态
msg_box.exec_()
# ----- 将 Pixmap 转换为 图像 (来自网络)-----
def pixmap_to_cvimg(qt_pixmap):
qimg = qt_pixmap.toImage()
temp_shape = (qimg.height(), qimg.bytesPerLine() * 8 // qimg.depth())
temp_shape += (4,)
ptr = qimg.bits()
ptr.setsize(qimg.byteCount())
result = np.array(ptr, dtype=np.uint8).reshape(temp_shape)
result = result[..., :3]
return result
def main():
# 以下为PYQT的UI显示基础命令
# 创建QApplication类的实例
app = QApplication(sys.argv)
widgets = QMainWindow()
window = Ui_MainWindow()
window.setupUi(widgets)
widgets.show()
sys.exit(app.exec_())
if __name__ == '__main__':
main()
备注:本文内容主要用于学习记录,整体功能可以实现,但需根据所需识别图像的色彩类型来进行预处理,可提高识别准确度;部分内容参考其他大佬的,但忘记其来源故未标注。