文章来着:在crontab中执行scrapy(解决不执行,不爬取数据的问题) – 自我的进化
在crontab中执行scrapy会遇到命令不执行,或者执行了但是没有爬取数据的问题,这里做一下总结
# 顺序执行所有爬虫
import subprocess
from datetime import datetime
import time
def crawl_work():
date_start = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
crawl_name_list = ['spider_1', 'spider_2']
record_time_list = {}
for crawl_name in crawl_name_list:
start_time = time.time()
record_time_list[crawl_name] = {}
record_time_list[crawl_name]['start_date'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
subprocess.Popen('/usr/local/bin/scrapy crawl ' + crawl_name, shell=True).wait()
end_time = time.time()
record_time_list[crawl_name]['end_date'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
record_time_list[crawl_name]['time_last'] = int((end_time - start_time) / 60) # 分钟,向下取整
print('time_record-date_start: ', date_start)
for crawl_name, record_time in record_time_list.items():
print('time_record-' + crawl_name + ': ', record_time)
date_end = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
print('time_record-date_end: ', date_end)
if __name__ == '__main__':
crawl_work()