# -*- coding: utf-8 -*-
import datetime, time, os
import pymssql, yagmail
## 初始化邮件模块 链接邮箱服务器
mailers= yagmail.SMTP(user="[email protected]", password="xxxx", host="smtp.xx.com")
print('当前时间:{} ,定时爬虫开始运行。。。'.format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
st= datetime.datetime.now()
start= time.time()
class MySQLPipline(object):
### 爬虫启动时
def open_spider(self, spider):
self.db= pymssql.connect(host="xxxx", user="", password="", database="xxx",charset='utf8')
self.cursor= self.db.cursor()
### 插入数据库
def process_item(self, item, spider):
data= dict(item)
keys= ', '.join(data.keys())
values= ', '.join(['%s'] * len(data))
###sql= "INSERT INTO {0} ({1}) VALUES ({2})".format(item.table, keys, values)
try: # item里面定义的字段和表字段对应
self.cursor.execute(sql, tuple(data.values()))
self.db.commit() # 提交sql语句
# print('插入成功!')
except Exception as ex:
print("错误在这>>>>>", ex, "<<<<<错误在这")
print(data.values())
print("*"*60)
self.db.rollback()
return item # 必须实现返回
# 爬虫结束时
def close_spider(self, spider, reason):
self.cursor.close()
self.db.close()
print('爬虫执行结束时间为:{}'.format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
et= datetime.datetime.now()
print('爬虫 [info] 耗时: %s' % (et- st))
stats_info= spider.crawler.stats._stats# 爬虫结束时控制台信息
subject= " [%s] 爬虫关闭提醒" % spider.name
contents= "爬虫 [%s] 已经关闭,程序执行结束! \r\n 爬虫 [info] 耗时:%s ,\n原因是: %s, \n以下为运行信息:\n %s" % (spider.name, et-st, reason, stats_info)
mailers.send(to="[email protected]", subject=subject, contents=contents)
print("*" * 100)