wyxw.py中代码
# -*- coding: utf-8 -*-
import scrapy
from ..items import WyxwItem
class WyxwSpider(scrapy.Spider):
name = 'wyxw'
allowed_domains = ['news.163.com']
start_urls = ['http://news.163.com/special/0001386F/rank_whole.html']
def parse(self, response):
item = WyxwItem()
xq_title = response.xpath('//table/tr/td[1]/a/text()').extract()
item['title'] = xq_title
print(xq_title)
xq_url = response.xpath('//table/tr/td[1]/a/@href').extract()
item['xq_url'] = xq_url
print(xq_url)
xq_djl = response.xpath('//table/tr/td[2]/text()').extract()
print(xq_djl)
item['dj'] = xq_djl
for i in range(0, len(xq_title)):
item['title'] = xq_title[i]
item['xq_url'] = xq_url[i]
item['dj'] = xq_djl[i]
yield item
items.py文件代码
class WyxwItem(scrapy.Item):
title = scrapy.Field()
xq_url = scrapy.Field()
dj = scrapy.Field()
def get_insert_sql(self):
sql = 'insert into wyxw_test(title,xq_url,dj) values (%s,%s,%s)'
data = (self['title'],self['xq_url'],self['dj'])
return (sql,data)
pipelines.py代码
class MysqlProjectPipeline(object):
def process_item(self, item, spider):
(insert_sql,data) = item.get_insert_sql()
myhelper = MysqlHelper()
myhelper.execute_modify_sql(insert_sql,data)
其他文件配置看scrapy框架基本设置