python 微信机器人自动聊天+回怼表情包+Adidas公众号自动抽签 四、获取表情包中的文字并进行同类型回怼

前言

在前文自动回怼表情包中, 只是做到爬取表情包存放在本地 ,然后当微信好友发送表情包给你的时候,随机抓取本地一个表情包进行回复。太不灵活

识别表情包中的文字

from __future__ import unicode_literals
from wxpy import *
import requests
import json
import re
import urllib.request  #urllib2
import threading
import glob
import random
import urllib
import base64
import os
from apscheduler.schedulers.blocking import BlockingScheduler
bot = Bot(cache_path=True)


@bot.register(except_self=False)
def print_others(msg):
    print(msg)
    message = msg.text
    type = msg.type
    reply = u''
    if type == 'Text':       
        # 机器人自动陪聊
        if get_response(message) != '亲爱的,当天请求次数已用完。':
            reply = get_response(message)
        else:
            reply = ''
        return reply
    elif type == 'Picture':
        # 识别图中文字
        # 获取对方发送的表情包
        path = os.path.join('./getImages/' + msg.file_name)
        msg.get_file(path)
        mes=getMessageByImage(msg.file_name)
        return searchImg(mes)


# 识别图片文字 百度云技术接口https://cloud.baidu.com/doc/OCR/s/zjwvxzrw8/
def getMessageByImage(imageName):
    takonUrl = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=**************&client_secret=************' # 根据 http://ai.baidu.com/docs#/Auth/top 百度账号的id和secret获取的takon 然后放入url中
    res = requests.get(takonUrl)
    # 找到token
    takon = res.json()['access_token']
    url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=' + takon
    with open(current_path+'/getImages/'+imageName, 'rb') as f:
        data = base64.b64encode(f.read())
    imageEncode = str(data, 'utf-8')
    params = {
     "image": imageEncode}
    headers = {
     
        'Content-Type': 'application/x-www-form-urlencoded'
    }
    postdata = urllib.parse.urlencode(params).encode('utf-8')
    request = urllib.request.Request(url=url, data=postdata, headers=headers)
    res = urllib.request.urlopen(request)
    page_source = res.read().decode('utf-8')
    info = json.loads(page_source)
    s = ''
    for i in range(0, int(info['words_result_num'])):
        s = s + info['words_result'][i]['words']
    # 获取到图片中的文字
    return get_response(msg)


def get_response(msg):
    apiUrl = 'http://www.tuling123.com/openapi/api'   #图灵机器人的api
    payload = {
     
        'key': 'ce697b3fc8b54d5f88c2fa59772cb2cf',  # api Key
        'info': msg,  # 这是我们收到的消息
        'userid': 'wechat-robot',  # 这里可随意修改
    }
    # 通过如下命令发送一个post请求
    r = requests.post(apiUrl, data=json.dumps(payload))
    mes = json.loads(r.text)['text']
    return mes


# 获取表情包
def Downloader(step):
    # 定义目标网站url
    baseurl = 'http://www.doutula.com/photo/list/?page='
    # #编写模拟浏览器获取
    headers = {
     
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
        'Accept': 'text/html;q=0.9,*/*;q=0.8',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
        'Accept-Encoding': 'gzip',
        'Connection': 'close',
        'Referer': None  # 注意如果依然不能抓取的话,这里可以设置抓取网站的host
        }
    # 遍历获得目标网站的每一页

    a = step * 50
    b = step * 50 + 50
    for i in range(a, b):
        urls = baseurl + str(i)
        print(urls)

        # 用Request的get请求获取网页代码
        r = requests.get(urls, headers=headers)
        html = r.text
        # #用正则匹配获取图片链接
        zz = re.compile(r'data-original="(.*?)".*?alt="(.*?)"', re.S)
        img = re.findall(zz, html)
        # 遍历得到图片名字和url
        for a in img:
            # 设置保存路径
            imgname = a[1]
            imgname = re.sub('\/|\\\\|《|》|。|?|!|\.|\?|!|\*|&|#|(|)|(|)|(|)', '', imgname)
            imgtype = a[0].split('.')[-1]
            path = ('battleImages/%s.%s' % (imgname, imgtype))
            print(path, a[0])
            # 用urllib库来进行保存
            dir = os.path.join('./', path)
            urllib.request.urlretrieve(a[0], dir)


t_obj = []
# 多线程爬取表情包
for i in range(10):
    t = threading.Thread(target=Downloader, args=(i,))
    # t.start()
    # t_obj.append(t)

for t in t_obj:
    t.join()


current_path = os.getcwd()
imgs=[]


# 寻找图
def searchImg(keywords):
    print('keywords: %s' % keywords)
    for name in glob.glob(current_path+'/battleImages/*'+keywords+'.*'):
        imgs.append(name)

你可能感兴趣的:(python,wxpy,识别文字,python)