以前接了图灵机器人的api做过一个微信小机器人,但是这个机器人只会尬聊也就算了,竟然连斗图都不能!!!虽说图灵机器人官网上有这个api,可是由于我是用python手动接入而不是直接授权,发送表情包给他依然会回复尬聊。我深信没有斗图的机器人是没有灵魂的,于是想动手自己写一个。
参考:用python爬取斗图网
打印从用户哪里获得的消息,会发现,接收表情包返回的是一行文本信息:【收到不支持的消息类型,暂无法显示】
于是我们稍微改一改之前的代码,对这行文本进行判断,只要接收到这行消息,参考爬取代码从斗图网随机爬取一个表情包下载下来,再传入临时素材库,获取mediaid,再生成图片消息发送给用户就可以了。
效果:因为又要下载又要上传显得稍微有点卡顿,但是还可以。
[外链图片转存失败(img-LcIYYqi5-1567160626891)(https://sika0819.top//images/pasted-48.png)]
核心代码:
handler.py
# -*- coding=utf-8 -*-
import hashlib
import reply
import receive
import web
import robot
import getemoticon
from basic import Basic
from media import Media
import json
class Handle(object):
def GET(self):
try:
data = web.input()
if len(data) == 0:
return "hello, this is handle view"
signature = data.signature
timestamp = data.timestamp
nonce = data.nonce
echostr = data.echostr
token = "yourtoken"
list = [token, timestamp, nonce]
list.sort()
sha1 = hashlib.sha1()
map(sha1.update, list)
hashcode = sha1.hexdigest()
if hashcode == signature:
return echostr
else:
return ""
except Exception:
return Exception.message
def POST(self):
try:
webData = web.data()
#print("Handle Post webdata is ", webData)
recMsg=receive.parse_xml(webData)
if isinstance(recMsg, receive.Msg):
toUser = recMsg.FromUserName
fromUser = recMsg.ToUserName
if recMsg.MsgType=='text':
content = recMsg.Content
print(content);
if content=="【收到不支持的消息类型,暂无法显示】":
path=getemoticon.getRandomEmoticon()
print(path)
myMedia = Media()
accessToken = Basic().get_access_token()
mediaType = "image"
callbackjson = myMedia.upload(accessToken, path, mediaType)
callback = json.loads(callbackjson)
mediaId=callback[u'media_id']
createTime=callback[u'created_at']
replyMsg = reply.ImageMsg(toUser, fromUser,createTime,mediaType,mediaId)
return replyMsg.send()
else:
rpyMsg= robot.get_response(content,fromUser)
replyMsg=reply.TextMsg(toUser, fromUser,rpyMsg)
return replyMsg.send()
if recMsg.MsgType == 'image':
mediaId = recMsg.MediaId
replyMsg = reply.ImageMsg(toUser, fromUser, mediaId)
return replyMsg.send()
else:
print("none handler yet")
return "success"
except Exception as Argument:
print Exception.message
return "fail"
media.py
# -*- coding: utf-8 -*-
# filename: media.py
from basic import Basic
import urllib2
import poster.encode
from poster.streaminghttp import register_openers
class Media(object):
def __init__(self):
register_openers()
#上传图片
def upload(self, accessToken, filePath, mediaType):
openFile = open(filePath, "rb")
param = {'media': openFile}
postData, postHeaders = poster.encode.multipart_encode(param)
postUrl = "https://api.weixin.qq.com/cgi-bin/media/upload?access_token=%s&type=%s" % (accessToken, mediaType)
request = urllib2.Request(postUrl, postData, postHeaders)
urlResp = urllib2.urlopen(request)
return urlResp.read()
# if __name__ == '__main__':
# myMedia = Media()
# accessToken = Basic().get_access_token()
# filePath = "img/帽冷汗.jpg" #请安实际填写
# mediaType = "image"
# myMedia.upload(accessToken, filePath, mediaType)
reply.py
# -*- coding=utf-8 -*-
import time
class Msg(object):
def __init__(self):
pass
def send(self):
return "success"
class TextMsg(Msg):
def __init__(self, toUserName, fromUserName, content):
self.__dict = dict()
self.__dict['ToUserName'] = toUserName
self.__dict['FromUserName'] = fromUserName
self.__dict['CreateTime'] = int(time.time())
self.__dict['Content'] = content
def send(self):
XmlForm = """
{CreateTime}
"""
return XmlForm.format(**self.__dict)
class ImageMsg(Msg):
def __init__(self, toUserName, fromUserName, mediaId):
self.__dict = dict()
self.__dict['ToUserName'] = toUserName
self.__dict['FromUserName'] = fromUserName
self.__dict['CreateTime'] = int(time.time())
self.__dict['MediaId'] = mediaId
def __init__(self, toUserName, fromUserName,createTime,msgType, mediaId):
self.__dict = dict()
self.__dict['ToUserName'] = toUserName
self.__dict['FromUserName'] = fromUserName
self.__dict['CreateTime'] = createTime
self.__dict['MsgType'] = msgType
self.__dict['MediaId'] = mediaId
def send(self):
XmlForm = """
{CreateTime}
"""
return XmlForm.format(**self.__dict)
随机爬取表情包
getemoticon.py
# -*- coding=utf-8 -*-
#导入模块
import random
import requests
import re
from bs4 import BeautifulSoup
import bs4
import os
#创建请求头列表,帮助我们在进行数据爬取的时候伪装成浏览器
my_headers = [
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
]
kv = {"User-Agent": "Mozilla/5.0"}
def getHTMLText(url, headers):
try:
#随机从headers列表中选择一个header使用
random_header = random.choice(headers)
r = requests.get(url, headers={"User-Agent": random_header}, timeout = 30)
#校验是否爬取成功,如果获取失败,输出“爬取失败”
r.raise_for_status()
r.encoding = r.apparent_encoding
# print r.text
return r.text
except:
print("爬取失败")
def getImgList(Ilist, html):
#使用python自带的html解析器,html.parser进行返回的html数据的解析工作
soup = BeautifulSoup(html, "html.parser")
# print html
#分析解析后的html代码,通过正则表达式获取每一个图片对应的url地址,然后组成获取url的正则表达式
pattern_img = re.compile(r'data-original="(.+?)"')
#获取图片对应的标题
pattern_title = re.compile(r'alt="(.+?)"')
#找到所有的图片url值
imgList = re.findall(pattern_img, html)
# print imgList
#获取所有的图片对应的标题信息
titleList = re.findall(pattern_title, html)
# print titleList[0].encode('utf-8')
#将每一对urli地址和title组成一个列表项,放入到另外一个列表项中可以通过下表进行调用
for i in range(len(imgList)):
# print i,
titleList[i] = titleList[i].encode('utf-8')
# print titleList[i]
Ilist.append([imgList[i], titleList[i]])
return Ilist
#判断是否存在指定的文件夹,然后创建文件夹
def mkdir():
if not os.path.exists('img'):
os.mkdir('img')
def saveImg(Ilistcontent):
mkdir()
img_content = requests.get(Ilistcontent[0]).content
img_path=""
if (Ilistcontent[0][-4:] == '.jpg'):
img_path='img/%s.jpg' % (Ilistcontent[1].decode('utf-8'))
elif (Ilistcontent[0][-4:] == '.gif'):
img_path='img/%s.gif' % (Ilistcontent[1].decode('utf-8'))
if os.path.exists(img_path):
return img_path
with open(img_path, 'wb') as f:
f.write(img_content)
f.close()
return img_path
def download(page):
Ilist = []
url = "https://www.doutula.com/photo/list/?page=%d" %page
html = getHTMLText(url, my_headers)
Ilist = getImgList(Ilist, html)
#printImg(Ilist,page)
return Ilist
#page = 1
def getRandomEmoticon():
print("下载图片")
page= 1;
print("第%d页"%page)
Ilist= download(page);
i=random.randint(0,len(Ilist)-1)
print("第%d张图"%i)
path=saveImg(Ilist[i])
return path
这里我因为怕速度太慢只爬取了第一页,然后从第一页随机下载一个图片下来。逻辑应该还可以在优化,从本地直接上传表情包会更快。最好的方式应该是定期下载表情包,把表情存成永久素材,每次直接调用。但由于我是个懒癌晚期,完全不想进行后台管理,于是做成了随用随下载。