代码如下:
import itchat
import requests
from bs4 import BeautifulSoup
itchat.auto_login(enableCmdQR='-1',hotReload=True)
def send(url):
users=itchat.search_friends(name=u'发送对象微信名')
userName=users[0]['UserName']
start_html = requests.get(url)
soup = BeautifulSoup(start_html.text, 'lxml')
list=soup.find_all(attrs={'class': 'post'})
url=soup.find(attrs={'class': 'next'}).get('href')
for i in list:
content=str(i.find(attrs={'class': 'post-title'}).get_text())+\
str(i.find(attrs={'class': 'post-content'}).get_text())
itchat.send(content, toUserName=userName)
@itchat.msg_register(itchat.content.TEXT)
def print_content(msg):
y=str(msg['Text'])
if y.isdigit():
url='http://duanziwang.com/category/%E7%BB%8F%E5%85%B8%E6%AE%B5%E5%AD%90/{}/'
url=url.format(y)
send(str(url))
itchat.run()
1、微信登录
itchat.auto_login(enableCmdQR='-1',hotReload=True)
2、获取发送的对象
users=itchat.search_friends(name=u'发送对象微信名')
userName=users[0]['UserName']
3、获取要爬虫的网页对象
start_html = requests.get(url)
soup = BeautifulSoup(start_html.text, 'lxml')
4、解析网页内容并发送
list=soup.find_all(attrs={'class': 'post'})
url=soup.find(attrs={'class': 'next'}).get('href')
for i in list:
content=str(i.find(attrs={'class': 'post-title'}).get_text())+\
str(i.find(attrs={'class': 'post-content'}).get_text())
itchat.send(content, toUserName=userName)
其中
url=soup.find(attrs={'class': 'next'}).get('href')
是获取下一页的链接,本想实现微信输入下一页,自动发送下一页网页内容,但是没能实现。
5、获取对方微信发送的内容,根据内容拼接网页连接,调用发送程序。
@itchat.msg_register(itchat.content.TEXT)
def print_content(msg):
y=str(msg['Text'])
url='http://duanziwang.com/category/%E7%BB%8F%E5%85%B8%E6%AE%B5%E5%AD%90/{}/'
url=url.format(y)
send(str(url))
6、由于这个网页总共40多页,所以微信输入的数字要在这之内,输入其他内容,不会爬取网页内容。另外,为了方便测试,可以将发送对象改为文件传输助手,代码如下:
import itchat
import requests
from bs4 import BeautifulSoup
itchat.auto_login(enableCmdQR='-1',hotReload=True)
def send(url):
#users=itchat.search_friends(name=u'filehelper')
#userName=users[0]['UserName']
start_html = requests.get(url)
soup = BeautifulSoup(start_html.text, 'lxml')
list=soup.find_all(attrs={'class': 'post'})
url=soup.find(attrs={'class': 'next'}).get('href')
for i in list:
content=str(i.find(attrs={'class': 'post-title'}).get_text())+\
str(i.find(attrs={'class': 'post-content'}).get_text())
itchat.send(content, toUserName='filehelper')
@itchat.msg_register(itchat.content.TEXT)
def print_content(msg):
y=str(msg['Text'])
if y.isdigit():
url='http://duanziwang.com/category/%E7%BB%8F%E5%85%B8%E6%AE%B5%E5%AD%90/{}/'
url=url.format(y)
send(str(url))
itchat.run()