url = 'http://top.baidu.com/buzz?b=1&fr=topindex'
header={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'Referer': 'http://top.baidu.com/'
}
r = requests.get(url,headers=header)
r.encoding='gbk'
#print(r.text)
selector = etree.HTML(r.text)
# 二次检索
eles = selector.xpath('//td[@class="keyword"]/a[1]')
#print(len(eles))
ls =[]
for ele in eles:
#print(index+1)
# 百度热搜主题
title = ele.xpath('./text()')[0]
#print(title)
# 百度热搜主题链接
url = ele.xpath('./@href')[0]
#print(url)
crawled_time = datetime.now()
temp_ls = {}
temp_ls['title'] =title
temp_ls['url'] = url
temp_ls['crawled_time'] = str(crawled_time)
ls.append(temp_ls)
#print(ls)
data_email =''
for index , email_ls in enumerate(ls):
# 发送到邮箱显示的热搜主题内容
# 将title字符串 强转为其他类型
title = email_ls['title']
url = email_ls['url']
crawled_time = email_ls['crawled_time']
html = '%d:热搜标题title:%s 爬取时间:%s
'%((index+1),url,title,crawled_time)
#print(html)
data_email+=html
#print(len(data_email))
print('邮件发送的总字符数目:',len(data_email))
import smtplib
from email.mime.text import MIMEText
from email.header import Header
from data import from_addr,to_addr ,qqCode # 这里是我自己新建的py文件 存放邮件发送人 收件人的账号
# SMTP邮件的发送
From_addr = from_addr # 发送邮件人的账号
To_addr = to_addr #收件人的账号
#这里是SMTP邮件 接受的授权码 QQ安全等级过高 需要去开启SMTP授权
QQCode = qqCode
# smtp发送器 qq邮箱设置为这种形式 126.com设置为smtp.126.com
smtp_server = 'smtp.qq.com'
# qq接收端口 默认465 126.com默认端口25
smtp_port = 465
# 配置服务器
stmp = smtplib.SMTP_SSL(smtp_server,smtp_port)
# 邮箱设置登录
stmp.login(From_addr,QQCode)
# 组装邮件内容的发送
# 邮件发送内容 发送文件形式 编码格式
message =MIMEText(data_email,'html','utf-8') # 发送的内容
message['FROM']=Header('PYTHON邮件','utf-8') # 发件人
message['TO'] = Header('管理员','utf-8') # 收件人
Subject = '百度热搜风云榜'
message['subject'] =Header(Subject,'utf-8') # 邮件标题
try:
stmp.sendmail(From_addr,To_addr,message.as_string())
print('邮件发送成功')
except Exception as e:
print('邮件发送失败!'+str(e))
import requests
from lxml import etree
import cssselect
from datetime import datetime
url = 'http://top.baidu.com/buzz?b=1&fr=topindex'
header={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'Referer': 'http://top.baidu.com/'
}
r = requests.get(url,headers=header)
r.encoding='gbk'
#print(r.text)
selector = etree.HTML(r.text)
# 二次检索
eles = selector.xpath('//td[@class="keyword"]/a[1]')
#print(len(eles))
ls =[]
for ele in eles:
#print(index+1)
# 百度热搜主题
title = ele.xpath('./text()')[0]
#print(title)
# 百度热搜主题链接
url = ele.xpath('./@href')[0]
#print(url)
crawled_time = datetime.now()
temp_ls = {}
temp_ls['title'] =title
temp_ls['url'] = url
temp_ls['crawled_time'] = str(crawled_time)
ls.append(temp_ls)
#print(ls)
data_email =''
for index , email_ls in enumerate(ls):
# 发送到邮箱显示的热搜主题内容
# 将title字符串 强转为其他类型
title = email_ls['title']
url = email_ls['url']
crawled_time = email_ls['crawled_time']
html = '%d:热搜标题title:%s 爬取时间:%s
'%((index+1),url,title,crawled_time)
#print(html)
data_email+=html
#print(len(data_email))
print('邮件发送的总字符数目:',len(data_email))
# 发送邮件 给本地邮箱 这里采取发送邮件给QQ邮箱 即[email protected]
# 发生给本地邮箱的方式可以以文本形式发送 但不简洁 另一种以html形式 干净简洁 这里采用html形式发送
import smtplib
from email.mime.text import MIMEText
from email.header import Header
from data import from_addr,to_addr ,qqCode # 这里是我自己新建的py文件 存放邮件发送人 收件人的账号
# SMTP邮件的发送
From_addr = from_addr # 发送邮件人的账号
To_addr = to_addr #收件人的账号
#这里是SMTP邮件 接受的授权码 QQ安全等级过高 需要去开启SMTP授权
QQCode = qqCode
# smtp发送器 qq邮箱设置为这种形式 126.com设置为smtp.126.com
smtp_server = 'smtp.qq.com'
# qq接收端口 默认465 126.com默认端口25
smtp_port = 465
# 配置服务器
stmp = smtplib.SMTP_SSL(smtp_server,smtp_port)
# 邮箱设置登录
stmp.login(From_addr,QQCode)
# 组装邮件内容的发送
# 邮件发送内容 发送文件形式 编码格式
message =MIMEText(data_email,'html','utf-8') # 发送的内容
message['FROM']=Header('PYTHON邮件','utf-8') # 发件人
message['TO'] = Header('管理员','utf-8') # 收件人
Subject = '百度热搜风云榜'
message['subject'] =Header(Subject,'utf-8') # 邮件标题
try:
stmp.sendmail(From_addr,To_addr,message.as_string())
print('邮件发送成功')
except Exception as e:
print('邮件发送失败!'+str(e))