上传图片获取URL及OCR识别结果

太懒了想一劳永逸干的事

import requests
import json, glob, os
import openpyxl
from requests_toolbelt import MultipartEncoder
from PIL import Image
import pytesseract as pt
import cv2
import numpy as np
import matplotlib.pyplot as plt



class aisakura(object):
    def __init__(self):
        self.token = 'eg'
        self.content_type = 'application/json'
        self.uri = 'http://.com'

    def upload_coordinate(self):
        urls = []
        picdir = glob.glob(r'./pic/*.png')   
        for i in picdir:
            print(i)
            # files = {'file': open(i, 'rb')}
            m = MultipartEncoder(
                fields = {'lessonId':"6704",'num':"10",'file':(i,open(i,'rb'),'text/plain')}
            )
            r = requests.post(self.uri + '/backend/unauth/upload/homework',
                              data=m,headers={'Content-Type':m.content_type})
            print(r.json())
            # print(r.elapsed.total_seconds())
            # return r.json()['data']
            urls.append(r.json()['data'])
        # print(urls)
        return urls

    def recognize_title(self, urls):
        wb = openpyxl.load_workbook('sakurai.xlsx')
        ws = wb['page10']
        # lessonId = [6700,6701,6703,6704]
        for url in urls:
            print('okkk')
            datalist = []
            for i in [0,3]:
                params = {
                    "url": url,
                    "classId": 2247,
                    "lessonId": 6704,
                    "sw": i,
                    "num":10
                }
                r = requests.post(self.uri + '/backend/unauth/job/AiJob',
                                  data=json.dumps(params),
                                  headers={
                                      'Content-Type': self.content_type,
                                      'Authorization': self.token
                                  })
                # print(r.json()['data'])
                datalist.append(r.json()['data'])
            row_max = ws.max_row
            # print(row_max)
            ws.cell(row=row_max+1,column=1,value=str(url))
            ws.cell(row=row_max+1,column=2,value=str(datalist[0]))
            # ws.cell(row=row_max+1,column=3,value=str(datalist[1]))
            # ws.cell(row=row_max+1,column=4,value=str(datalist[2]))
            # ws.cell(row=row_max+1,column=3,value=str(datalist[1]))
            wb.save("sakurai.xlsx")
        # return True

    def pillow(self):
        # picdir = glob.glob(r'./pic/*.png')   
        # for i in picdir:
        # 黑白处理
        img = cv2.imread(r'./pic/abcd.jpeg',0)
        # plt.imshow(img,cmap='gray',interpolation='bicubic')
        # plt.show()
        # text = pt.image_to_string(img,lang="chi_sim")
        # print(text)
        # 黑白二值化处理
        ret,img2=cv2.threshold(np.array(img),150,10,cv2.THRESH_BINARY)
        plt.imshow(img2,cmap='gray',interpolation='bicubic')
        plt.xticks([]),plt.yticks([])
        plt.show()
        text2 = pt.image_to_string(img2,lang="chi_sim")
        print(text2)


if __name__ == "__main__":
    aisakura = aisakura()
    # urls = aisakura.upload_coordinate()
    # result = aisakura.recognize_title(urls)
    # print(upload_coordinate)
    # print(os.getcwd())
    sakura = aisakura.pillow()



你可能感兴趣的:(上传图片获取URL及OCR识别结果)