上一篇文章介绍了如何从网页上抓取数据, 组合成想要的数据结构,这篇我们将讲解如何把数据持久化存储到MySQL。
环境安装
brew install mysql
这里推荐大家一款好用的数据库管理工具:DataGrip,可以方便的编辑修改数据库中的数据。
包安装
安装MySQL对应的Python库:
pip install MySQL-python
工具类准备
Python对MySQL操作起来还是比较繁琐的,这里用到了一个工具类dbMySQLUtils.py
:
import MySQLdb
class DBMySQLUtils:
myVersion=0.1
def __init__(self,host,user,password,charset="utf8"):
self.host=host
self.user=user
self.password=password
self.charset=charset
try:
self.conn=MySQLdb.connect(host=self.host,user=self.user,passwd=self.password)
self.conn.set_character_set(self.charset)
self.cursor=self.conn.cursor()
except MySQLdb.Error as e:
print ('MySql Error : %d %s' %(e.args[0],e.args[1]))
def setDB(self,db):
try:
self.conn.select_db(db)
except MySQLdb.Error as e:
print ('MySql Error : %d %s' %(e.args[0],e.args[1]))
def query(self,sql):
try:
rows=self.cursor.execute(sql)
return rows;
except MySQLdb.Error as e:
print('MySql Error: %s SQL: %s'%(e,sql))
def queryOnlyRow(self,sql):
try:
self.query(sql)
result=self.cursor.fetchone()
desc=self.cursor.description
row={}
for i in range(0,len(result)):
row[desc[i][0]]=result[i]
return row;
except MySQLdb.Error as e:
print('MySql Error: %s SQL: %s'%(e,sql))
def queryAll(self,sql):
try:
self.query(sql)
result=self.cursor.fetchall()
desc=self.cursor.description
rows=[]
for cloumn in result:
row={}
for i in range(0,len(cloumn)):
row[desc[i][0]]=cloumn[i]
rows.append(row)
return rows;
except MySQLdb.Error as e:
print('MySql Error: %s SQL: %s'%(e,sql))
def insert(self,tableName,data):
try:
newData={}
for key in data:
newData[key]="'"+data[key]+"'"
key=','.join(newData.keys())
value=','.join(newData.values())
sql="insert into "+tableName+"("+key+") values("+value+")"
self.query("set names 'utf8'")
self.query(sql)
self.commit()
except MySQLdb.Error as e:
self.conn.rollback()
print('MySql Error: %s %s'%(e.args[0],e.args[1]))
finally:
self.close()
def insertList(self,tableName,dataList):
try:
for data in dataList:
newData={}
for key in data:
newData[key]="'"+data[key]+"'"
key=','.join(newData.keys())
value=','.join(newData.values())
sql="insert into "+tableName+"("+key+") values("+value+")"
self.query("set names 'utf8'")
self.query(sql)
self.commit()
except MySQLdb.Error as e:
self.conn.rollback()
print('MySql Error: %s %s'%(e.args[0],e.args[1]))
finally:
self.close()
def update(self,tableName,data,whereData):
try:
newData=[]
keys=data.keys()
for i in keys:
item="%s=%s"%(i,"'""'"+data[i]+"'")
newData.append(item)
items=','.join(newData)
newData2=[]
keys=whereData.keys()
for i in keys:
item="%s=%s"%(i,"'""'"+whereData[i]+"'")
newData2.append(item)
whereItems=" AND ".join(newData2)
sql="update "+tableName+" set "+items+" where "+whereItems
self.query("set names 'utf8'")
self.query(sql)
self.commit()
except MySQLdb.Error as e:
self.conn.rollback()
print('MySql Error: %s %s'%(e.args[0],e.args[1]))
finally:
self.close()
def getLastInsertRowId(self):
return self.cursor.lastrowid
def getRowCount(self):
return self.cursor.rowcount
def commit(self):
self.conn.commit()
def close(self):
self.cursor.close()
self.conn.close()
存储房源信息
然后就是专门写个类往数据库读写数据:
import sdk.dbMySQLUtils as dbMySQLUtils
def store(list):
dbHelper = dbMySQLUtils.DBMySQLUtils("localhost", "root", "123456")
dbHelper.setDB("homelink")
dbHelper.insertList("homelist", list)
return
def query():
dbHelper = dbMySQLUtils.DBMySQLUtils("localhost", "root", "123456")
dbHelper.setDB("homelink")
sql = "select * from homelist"
rows = dbHelper.queryAll(sql)
return rows
这里把数据库的信息替换成你自己的,我事先创建了homelink数据库,homelist表来存储上一章获取到的房源信息列表数据。homelist表结构如下:
create table homelist
(
id int auto_increment
primary key,
hid varchar(100) not null,
image varchar(500) not null,
title varchar(500) not null,
url varchar(500) not null,
region varchar(100) null,
zone varchar(100) null,
meters varchar(100) null,
direction varchar(100) null,
area varchar(100) null,
floor varchar(100) null,
history varchar(100) null,
subway varchar(100) null,
hasKey varchar(100) null,
decoration varchar(100) null,
heating varchar(100) null,
price varchar(100) null,
priceUnit varchar(100) null,
updateTime varchar(100) null,
visited varchar(100) null,
visitedAction varchar(100) null,
createTime int not null,
constraint homelist_id_uindex
unique (id),
constraint homelist_hid_uindex
unique (hid)
)
engine=InnoDB
;
先获取到上一章方法提供的数据list,然后存储起来:
list = parseHtml.getList(1)
storeToDB.store(list)
数据就成功存储到数据库中了: