图片,PDF转换成文字

图片,PDF转换成文字小工具

本想实现微信小程序“传图识字”的功能,后面又添加了pdf文件转文字。

知识点

  • 百度OCR文字识别
from aip import AipOcr

APP_ID = ''
API_KEY = ''
SECRET_KEY = ''
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

""" 读取图片 """
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()
image = get_file_content(r'D:\Python\study\image\d2.png')

""" 调用通用文字识别, 图片参数为本地图片 """
content = client.basicGeneral(image)['words_result']
# print(len(content))
for co in content:
    print(co['words'])


# """ 如果有可选参数 """
# options = {}
# options["language_type"] = "CHN_ENG"
# options["detect_direction"] = "true"
# options["detect_language"] = "true"
# options["probability"] = "true"
  • PDF 逐页转换成图片
import fitz
import glob

def read_pdf(filename):
    pdffile = glob.glob(filename)[0]
    doc = fitz.open(pdffile)
    for pg in range(0, doc.pageCount):
        page = doc[pg]
        zoom = int(100) # 改为1000,图片清晰度会更高
        rotate = int(0)
        trans = fitz.Matrix(zoom / 100.0, zoom / 100.0).preRotate(rotate)
        pm = page.getPixmap(matrix=trans,alpha=True) # True 改为 0
        pm.writePNG(r'D:\Python\study\image\%s.png' % str(pg+1))

read_pdf(r'demo.pdf')
  • TK 控件及布局
    视频教程
    python-tkinter使用方法
    python之tkinter使用-消息弹框

  • 复制到系统剪贴板

安装pyperclip模块 pip intall pyerclip
pyperclip.copy(text) 把text字符串中的字符复制到剪切板
text = pyperclip.paste() 把剪切板上的字符串复制到text

  • 将py文件打包成 exe

切换到py文件目录
pyinstall -F -i KEY.ico tkinterTest.py -w
-w :关闭DOC控制台
-i : 替换exe图标

  • 小工具截图
    图片,PDF转换成文字_第1张图片

小工具源码

from tkinter import *
import tkinter.filedialog
import datetime
from tkinter import messagebox
import filetype
import os
from aip import AipOcr
import fitz
import glob
from pathlib import Path
import time
import pyperclip

def copy():
    '''
    实现复制按钮功能,复制到剪贴板
    :return:
    '''
    pyperclip.copy(text.get('0.0', 'end'))

def filepath():
    '''
    实现选择文件或目录按钮功能,选择图片或PDF弹出选择文件选择框,
    选择批量图片,弹出选择目录框
    :return:
    '''
    if r.get()==1 or r.get() == 2 :
        filename = tkinter.filedialog.askopenfilename()
        if filename != '':
            e.set(filename)
        else:
            e.set("")
    else:
        filename  = tkinter.filedialog.askdirectory()
        if filename != '':
            e.set(filename)
        else:
            e.set("")

def guess_filetype(path):
    '''
    获取文件类型
    :param path:文件路径
    :return: 文件类型
    '''
    kind = filetype.guess(path)
    if kind is None:
        print('Cannot guess file type!')
        messagebox.showerror('系统提示', path+',未能识别文件类型')
        return
    return kind.extension

""" 读取图片 """
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

def ocr_jpg(filepath):
    '''
    百度ORC识别结果图片
    :param filepath:
    :return:
    '''
    flp1, fullflname1 = os.path.split(filepath)
    text.insert(tkinter.INSERT, '*'*20+fullflname1 +'*'*20+ '\n')
    image = get_file_content(filepath)
    content = client.basicGeneral(image)['words_result']
    # print(len(content))
    for co in content:
        #print(co['words'])
        text.insert(tkinter.INSERT, co['words']+'\n')

def pdf_jpg(filepath):
    '''
    先将PDF逐页转为图片
    :param filepath:
    :return:
    '''
    pdffile = glob.glob(filepath)[0]
    doc = fitz.open(pdffile)
    img_path_list = []
    for pg in range(0, doc.pageCount):
        page = doc[pg]
        zoom = int(1000)
        rotate = int(0)
        trans = fitz.Matrix(zoom / 100.0, zoom / 100.0).preRotate(rotate)
        pm = page.getPixmap(matrix=trans,alpha=0)
        flp, fullflname = os.path.split(filepath)
        imagePath = flp + '/' + 'OCR_' + time.strftime("%Y%m%d")
        #imagePath = os.path.dirname(filepath)+'/'+fname+'_image'
        if not os.path.exists(imagePath):  # 判断存放图片的文件夹是否存在
            os.makedirs(imagePath)  # 若图片文件夹不存在就创建
        pm.writePNG(imagePath+'/'+'%s.png' % str(pg+1))
        # print(imagePath+'/'+'%s.png' % str(pg+1))
        img_path_list.append(imagePath+'/'+'%s.png' % str(pg+1))
    return img_path_list



def Transformation():
    '''开始按钮'''
    text.delete(1.0, tkinter.END) #清空text 控件

    starttime = datetime.datetime.now()
    if e.get()=='':
        mes = messagebox.showinfo('系统提示', '请选择文件或目录!')
        #print('showerror:', mes)

    else:
        if r.get() == 1:
            if Path(e.get()).is_file() == True:
                lb2.config(text='正在转换,请稍候...(请勿关闭窗口)')
                file_tyep = guess_filetype(e.get()) #获取路径文件类型
                tupian_list = ['jpg','png','bmp']
                if file_tyep in tupian_list:
                    print("jpg")
                    ocr_jpg(e.get())
                    fl_path, full_flname = os.path.split(e.get())
                    mk_path = fl_path+'/' +'OCR_' + time.strftime("%Y%m%d") #创建文件路径
                    if not os.path.exists(mk_path):  # 判断文件夹是否存在
                        os.makedirs(mk_path)  # 若文件夹不存在就创建
                    with open(mk_path+'/'+'OCR.txt', "a") as f:
                        f.write(text.get('0.0', 'end')) # 将text内容写入txt
                    end_time = datetime.datetime.now()
                    h_time = str((end_time - starttime).seconds)
                    lb2.config(text='结果已存入:'+mk_path+'/'+'OCR.txt'+'  耗时:'+ h_time+'秒')
                else:
                    messagebox.showerror('系统提示', '不支持的文件类型!')
            else:
                messagebox.showerror('系统提示', '文件路径不正确,请重新选择!')

        elif r.get() == 2:
            if Path((e.get())).is_file() == True:
                file_tyep = guess_filetype(e.get())
                if file_tyep == 'pdf':
                    #print("pdf")
                    img_path_list = pdf_jpg(e.get()) # 获取PDF转成图片后,所有图片的路径集合
                    for f in img_path_list:
                        ocr_jpg(f)

                    fl_path, full_flname = os.path.split(e.get())
                    mk_path = fl_path + '/' + 'OCR_' + time.strftime("%Y%m%d")
                    if not os.path.exists(mk_path):  # 判断的文件夹是否存在
                        os.makedirs(mk_path)  # 若文件夹不存在就创建
                    with open(mk_path + '/' + 'OCR.txt', "a") as f:
                        f.write(text.get('0.0', 'end'))
                    end_time = datetime.datetime.now()
                    h_time = str((end_time - starttime).seconds)
                    lb2.config(text='结果已存入:' + mk_path + '/' + 'OCR.txt' + '  耗时:' + h_time + '秒')

                else:
                    messagebox.showerror('系统提示', '不支持的文件类型!')
            else:
                messagebox.showerror('系统提示', '文件路径不正确,请重新选择!')

        else:
            if Path(e.get()).is_dir():
                # 获取文件名
                file_names = os.listdir(e.get())
                # print(file_names)
                # 文件名拼接路径
                file_list = [os.path.join(e.get(), file) for file in file_names]
                # print(file_list)
                tupian_list = ['jpg', 'png', 'bmp']
                for fi in file_list:
                    if guess_filetype(fi) in tupian_list:
                        ocr_jpg(fi)
                        with open(e.get() + '/' + 'OCR.txt', "a") as f:
                            f.write(text.get('0.0', 'end'))
                        lb2.config(text='结果已存入:' + e.get() + '/' + 'OCR.txt'+ '  耗时:' + h_time + '秒')
                        end_time = datetime.datetime.now()
                        h_time = str((end_time - starttime).seconds)
                        lb2.config(text='结果已存入:' + e.get() + '/' + 'OCR.txt'+ '  耗时:' + h_time + '秒')

                    else:
                        continue
            else:
                messagebox.showinfo('系统提示', '文件路径不正确,请重新选择!')


def delete_text():
    '''
    清空按钮
    :return:
    '''
    text.delete(1.0, tkinter.END)

# 百度OCRID
APP_ID = '***'
API_KEY = '***'
SECRET_KEY = '***'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)


root = Tk()
root.title('文字识别小工具')
root.geometry("600x600")

Label(root,text='转换类型:').place(relx=0, rely=0.05, relwidth=0.2)

# 单选框组件
r = tkinter.IntVar()
radio1 = tkinter.Radiobutton(root, text="图片转文字", value=1, variable=r)
radio1.place(relx=0.15, rely=0.05, relwidth=0.3)
radio2 = tkinter.Radiobutton(root, text="pdf转文字", value=2, variable=r)
radio2.place(relx=0.4, rely=0.05, relwidth=0.3)
radio3 = tkinter.Radiobutton(root, text="图片转文字(批量)", value=3, variable=r)
radio3.place(relx=0.7, rely=0.05, relwidth=0.3)
r.set(1) # 默认选择第一个

Label(root,text='文件/目录路径:').place(relx=0, rely=0.15, relwidth=0.25)
# 绑定变量 输入框控件
e = tkinter.Variable()
entry = tkinter.Entry(root, textvariable=e)
entry.place(relx=0.25, rely=0.15, relwidth=0.5)
button1 = Button(root,text='选择文件/目录',command=filepath)
button1.place(relx=0.78, rely=0.13, relwidth=0.25)


lb2 = Label(root,text='',wraplength = 280,justify = 'left')
lb2.place(relx=0.2, rely=0.25, relwidth=0.7)
button2 = Button(root,text='开始转换',command=Transformation)
button2.place(relx=0, rely=0.25, relwidth=0.2)



text = tkinter.Text(root, width=30, height=10)
text.place(relx=0.05, rely=0.35,relheight=0.5,relwidth=0.9)

button3 = Button(root,text='复制全部',command=copy)
button3.place(relx=0.15, rely=0.9, relwidth=0.2)
button3 = Button(root,text='清空',command=delete_text)
button3.place(relx=0.6, rely=0.9, relwidth=0.2)

root.mainloop()

你可能感兴趣的:(python)