Scrapy--淘宝--[get]--[加入数据库]

taobao.py

import scrapy,re
from Day10zuoye.items import TaobaoItem

class TaobaoSpider(scrapy.Spider):
name = ‘taobao’
allowed_domains = [‘taobao.com’]
start_urls = [‘https://s.taobao.com/search?spm=a21c9.8840246.navitem.4.641ed0adCuMjGU&q=%E5%B0%A4%E5%85%8B%E9%87%8C%E9%87%8C‘]

def parse(self, response):
    # print(response.body)
    # with open('taobao.html','wb')as f:
    #     f.write(response.body)
    # pass
    html_str = response.text
    # 标题
    titles = re.findall('"raw_title":"(.*?)"', html_str)
    print(titles)
    # 价钱
    prices = re.findall('"view_price":"(.*?)"', html_str)
    print(prices)
    # 收款数量
    sales_nums = re.findall('"view_sales":"(.*?)"', html_str)
    print(sales_nums)
    # 图片链接
    image_urls = re.findall('"pic_url":"(.*?)"', html_str)
    print(image_urls)
    for i in range(0, len(titles)):
        item = TaobaoItem()
        item['title']=titles[i]
        item['price']=prices[i]
        item['sales_num']=sales_nums[i]
        item['image_url']=image_urls[i]
        # 返回我们的item
        yield item

items.py

在原来脚本新写入了以下这个类

class TaobaoItem(scrapy.Item):
title = scrapy.Field()
price = scrapy.Field()
sales_num = scrapy.Field()
image_url = scrapy.Field()

def get_insert_sql(self):
    sql = 'INSERT INTO taobao(title, price, sales_num, image_url) ' \
          'VALUES (%s, %s, %s, %s)'

    data = (self['title'], self['price'], self['sales_num'], self['image_url'])
    return (sql, data)

pipelines.py

mysqlhelper是准备好的脚本
需要在setting.py中ITEM_PIPELINES字段中添加 Day10zuoye.pipelines.StoreMysqlScrapyPipeline’: 300,

from Day10zuoye.mysqlhelper import MysqlHelper

class Day10ZuoyePipeline(object):
def process_item(self, item, spider):
return item

class StoreMysqlScrapyPipeline(object):
def process_item(self, item, spider):
# 存储数据到mysql中
(insert_sql, data) = item.get_insert_sql()
# 生成mysqlhelper的类
myhelper = MysqlHelper()
myhelper.execute_modify_sql(insert_sql, data)
return item

你可能感兴趣的:(Scrapy--淘宝--[get]--[加入数据库])