pipelines.py文件中新建MySQLPipeline类:
# 导入库
from scrapy.utils.project import get_project_settings
import pymysql
# 写入数据库
class MySQLPipeline(object):
def connect_db(self):
# 从settings.py文件中导入数据库连接需要的相关信息
settings = get_project_settings()
self.host = settings['DB_HOST']
self.port = settings['DB_PORT']
self.user = settings['DB_USER']
self.password = settings['DB_PASSWORD']
self.name = settings['DB_NAME']
self.charset = settings['DB_CHARSET']
# 连接数据库
self.conn = pymysql.connect(
host = self.host,
port = self.port,
user = self.user,
password = self.password,
db = self.name, # 数据库名
charset = self.charset,
)
# 操作数据库的对象
self.cursor = self.conn.cursor()
# 连接数据库
def open_spider(self, spider):
self.connect_db()
# 关闭数据库连接
def close_spider(self, spider):
self.cursor.close()
self.conn.close()
# 写入数据库
def process_item(self, item, spider):
# 写入数据库内容
# 这里根据需求自行设置要写入的字段及值
sql = 'insert into book (title, img_url) values ("%s", "%s")' % (item['title'], item['img_url'])
# 执行sql语句
self.cursor.execute(sql)
# 需要强制提交数据,否则数据回滚之后,数据库为空
self.conn.commit()
return item
设置settings.py文件,开启ITEM_PIPELINE,并设置数据库相关信息:
ITEM_PIPELINES = {
'dushuProject.pipelines.DushuprojectPipeline': 300,
'dushuProject.pipelines.MySQLPipeline': 200, # 优先级设置稍靠前点
}
DB_HOST = 'localhost'
DB_PORT = 3306
DB_USER = '数据库用户名'
DB_PASSWORD = '数据库密码'
DB_NAME = '数据库名'
DB_CHARSET = 'utf8'