# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import json
import csv
class GushiwenPipeline:
# 在这里保存
def open_spider(self, spider):
# self.f = open('gsw.csv', 'w', encoding='utf-8', newline='')
self.f = open('gsw.txt', 'w', encoding='utf-8', newline='')
def process_item(self, item, spider):
# 这里的item是对象,是item =GushiwenItem()的实例化对象,不用str()用json
# {'author': '王建',
# 'content': '精卫谁教尔填海,海边石子青磊磊。但得海水作枯池,海中鱼龙何所为?口穿岂为空衔石,山中草木无全枝。朝在树头暮海里,飞多羽折时堕水。高山未尽海未平,愿我身死子还生!',
# 'dynasty': '〔唐代〕',
# 'title': '精卫词'}
# 这里是要把item的类型(不是字典是对象)变成json类型,是用dumps,因为是实例化对象,要用dict包裹
item_json = json.dumps(dict(item), ensure_ascii=False)
# headers = ('author', 'content', 'dynasty', 'title')
# self.f.write(str(headers))
self.f.write(item_json + '\n')
# self.writer = csv.writer(self.f)
# self.writer.writerow(headers)
# self.writer.writerow(item_json + '\n')
# prin('*'*10)
# print(item)
# print('*' * 10)
return item
def close_spider(self, spider):
self.f.close()