场景一:输出到文件中
spiders/test.py
def parse(self, response):
response.css('title::text').extract()[0] ######## 重要1
pipelines.py
import json
import codecs
class TestPipeline(object):
def open_spider(self, spider):
self.file = codecs.open('test.json', 'w', encoding='utf-8') ######### 重要2
def close_spider(self, spider):
self.file.close()
def process_item(self, item, spider):
line = json.dumps(dict(item), ensure_ascii=False) + "\n" ####### 重要3
self.file.write(line)
return item
场景二:入数据库MySQL
spiders/test.py
def parse(self, response):
response.css('title::text').extract()[0].encode('utf-8') # 重要1
pipelines.py
import json
import codecs
class TestPipeline(object):
def open_spider(self, spider):
self.db = ......
def close_spider(self, spider):
self.db......
def process_item(self, item, spider):
sql = """insert into tmp1 (name) values (%s)"""
self.write_db.execute(sql, (item['name'])) #####重要2