工作中python 操作Word插入图片并ocr识别文字 写入文档,最后打包成exe工具案例记录

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @version  : Python 3.6.6
# @Time     : 2019/6/20 11:22

from aip import AipOcr
from docx import Document
from docx.oxml.ns import qn
from docx.shared import Pt
from tkinter import messagebox

import tkinter as tk
import os

#cache = []
def downtxt(iPath,cache):
    try:
        txt_names = [name for name in os.listdir(iPath) if os.path.splitext(name)[1] == '.txt'][0]
        with open(iPath + '/' + txt_names, 'r', encoding='utf-8') as f:
            for line in f:
                cache.append(list(line.strip('\n').split(','))[0])
    except:
        print('找不到txt文件')

def get_file_content(filePath):
    with open(filePath, 'rb') as f:
        return f.read()

#ocrlist = []
def ocr_baidu(filePath,ocrlist):
    APP_ID = '填你的AppID'
    API_KEY = '填你的API Key'
    SECRET_KEY = '填你的Secret Key'
    aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
    options = {'detect_direction': 'true', 'language_type': 'CHN_ENG', }

    result = aipOcr.basicGeneral(get_file_content(filePath), options)
    OCRresult = '   '.join([result['words_result'][i]['words'] for i in range(0, len(result['words_result']))])
    ocrlist.append(OCRresult + '#' + filePath)
    print(filePath)

def writedocx(savePath,ocrlist):
    # 打包exe 的时候 用 Docx = Document(docx=os.path.join(os.getcwd(), 'default.docx'))
    # 打包完后需要把 default.docx 和exe放在一个目录下
    Docx = Document()
    for item in ocrlist:
        Docx.add_paragraph().add_run("题目:").bold = True
        Docx.add_picture(str(item).split('#')[1])
        Docx.add_paragraph().add_run(str(item).split('#')[0])
        Docx.add_paragraph().add_run("题型:").bold = True
        Docx.add_paragraph().add_run("思路启发:").bold = True
        Docx.add_paragraph().add_run("解答过程:").bold = True
        Docx.add_paragraph().add_run("答案:").bold = True
        Docx.add_paragraph().add_run("归纳总结:").bold = True
        Docx.add_paragraph()

    Docx.styles['Normal'].font.name = u'宋体'
    Docx.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')

    Docx.styles['Normal'].paragraph_format.space_after = Pt(0)  # 设置与上一段间隔
    Docx.styles['Normal'].paragraph_format.space_before = Pt(0)  # 设置与下一段间隔

    Docx.save(savePath + ".docx")
    messagebox.showinfo("提示", "完成!")

def FunctionAi():
    cache = []
    ocrlist = []
    iPath = t.get()
    savePath = iPath + '\\' + iPath.split('\\')[-1]
    downtxt(iPath,cache)

    if cache != []:
        for i in cache:
            if '搜索失败' in i:
                print(i)
                img = str(i).split('\t')[1]
                filePath = iPath + '\\' + img
                ocr_baidu(filePath,ocrlist)

        writedocx(savePath,ocrlist)

if __name__ == '__main__':
    window = tk.Tk()
    window.title('测试')
    window.geometry('500x90')
    b2 = tk.Button(window, text='运 行', width=15, height=2, command=FunctionAi)
    b2.pack()

    t = tk.Entry(window, show=None, width='110')
    t.pack()
    window.mainloop()

#print('写入word完成!')

你可能感兴趣的:(python,python,ocr,word)