#创建工作表,并设置编码方式为utf-8
workBook = xlwt.Workbook(encoding='utf-8')
# 新增sheet
sheet = workBook.add_sheet('python职位表')
sheet.write(0,0,'职位名称')
sheet.write(0,1,'公司名称') #设置表头
#写入数据
sheet.write(1,0,'python开发')
sheet.write(1,1,'网易')
#关闭保存
workBook.save('Phthon职位介绍表.xls')#保存为表.xls
(1)数据写到本地txt文本文件
import os
f = open('num.txt', 'a', encoding='utf-8')
for i in range(10):
f.write(i) #以追加的方式像txt文件中写入文本内容
(2)数据写到csv格式文件
import csv
with open("test1.csv","a+",newline='') as csvfile:
writer = csv.writer(csvfile)
#先写入columns_name
writer.writerow(["index","a_name","b_name"])
#写入多行用writerows
writer.writerows([[0,1,3],[1,2,3],[2,3,4]])
(1)数据写入文件型数据库sqlite3数据库
import sqlite3
class DBManager(object):
connect=None
cursor=None
@classmethod
def create_db_and_table(cls):#创建数据库和表
cls.connect=sqlite3.connect('dataDB') #创建数据库,创建链接
cls.cursor=cls.connect.cursor() #创建游标
cls.cursor=execute('cteate table if not exists tableName(name text,content text)')#
cls.concent.commit()
@classmethod
def insert_info_to_table(cls,name,content): #向表中插入数据
cls.cursor.execute('insert into tableName(name,content) values("{}","{}")'.format(name,content))
cls.content.commit()
@classmethod
def close_db(cls):
cls.cursor.close()
cls.content.close()
@classmethod
def select_from_table(cls):
cls.cursor.execute('select name,content from tbTable where name LIKE "%%_"')
cls.connect.commit()
return cls.cursor.fetchall()
(2)数据写入mysql数据库
import sqlite3
class TaobaospiderPipeline(object):
def __init__(self):
self.connect = sqlite3.connect('taobaoDB')
self.cursor = self.connect.cursor()
self.cursor.execute('create table if not exists taobaoTable (name text ,price text)')
def process_item(self, item, spider):
self.cursor.execute('insert into taobaoTable (name ,price)VALUES ("{}","{}")'.format(item['name'],item['price']))
self.connect.commit()
return item
def close_spider(self ,spider):
self.cursor.close()
self.connect.close()
(3)数据写入到mongdb 非关系型数据库中
from scrapy.conf import settings#主机号,端口号,数据库名称由配置文件中引入
import pymongo 引入mongodb驱动包
class DoubanspiderPipeline(object):
def __init__(self):
# 获取setting主机名、端口号和数据库名
host = settings['MONGODB_HOST']
port = settings['MONGODB_PORT']
dbname = settings['MONGODB_DBNAME']
# pymongo.MongoClient(host, port) 创建MongoDB链接
client = pymongo.MongoClient(host=host,port=port)
# 指向指定的数据库
mdb = client[dbname]
# 获取数据库里存放数据的表名
self.post = mdb[settings['MONGODB_DOCNAME']]
def process_item(self, item, spider):
data = dict(item) #转化按格式
# 向指定的表里添加数据
self.post.insert(data)
return item