Playground for Python - 数据存储

上一篇文章介绍了如何从网页上抓取数据, 组合成想要的数据结构,这篇我们将讲解如何把数据持久化存储到MySQL。

环境安装

brew install mysql

这里推荐大家一款好用的数据库管理工具:DataGrip,可以方便的编辑修改数据库中的数据。

包安装

安装MySQL对应的Python库:

pip install MySQL-python

工具类准备

Python对MySQL操作起来还是比较繁琐的,这里用到了一个工具类dbMySQLUtils.py

import MySQLdb  
  
class DBMySQLUtils:
    myVersion=0.1  
    def __init__(self,host,user,password,charset="utf8"):  
        self.host=host  
        self.user=user  
        self.password=password  
        self.charset=charset  
        try:  
            self.conn=MySQLdb.connect(host=self.host,user=self.user,passwd=self.password)  
            self.conn.set_character_set(self.charset)  
            self.cursor=self.conn.cursor()  
        except MySQLdb.Error as e:  
            print ('MySql Error : %d %s' %(e.args[0],e.args[1]))  
      
    def setDB(self,db):  
        try:  
            self.conn.select_db(db)  
        except MySQLdb.Error as e:  
            print ('MySql Error : %d %s' %(e.args[0],e.args[1]))  
      
    def query(self,sql):  
        try:  
            rows=self.cursor.execute(sql)  
            return rows;  
        except MySQLdb.Error as e:  
            print('MySql Error: %s SQL: %s'%(e,sql))  
              
    def queryOnlyRow(self,sql):  
        try:  
            self.query(sql)  
            result=self.cursor.fetchone()  
            desc=self.cursor.description  
            row={}  
            for i in range(0,len(result)):  
                row[desc[i][0]]=result[i]  
            return row;  
        except MySQLdb.Error as e:  
            print('MySql Error: %s SQL: %s'%(e,sql))  
      
    def queryAll(self,sql):  
        try:  
            self.query(sql)  
            result=self.cursor.fetchall()  
            desc=self.cursor.description  
            rows=[]  
            for cloumn in result:  
                row={}  
                for i in range(0,len(cloumn)):  
                    row[desc[i][0]]=cloumn[i]  
                rows.append(row)    
            return rows;  
        except MySQLdb.Error as e:  
            print('MySql Error: %s SQL: %s'%(e,sql))  
      
    def insert(self,tableName,data):  
        try:  
            newData={}  
            for key in data:  
                newData[key]="'"+data[key]+"'"  
            key=','.join(newData.keys())  
            value=','.join(newData.values())  
            sql="insert into "+tableName+"("+key+") values("+value+")"  
            self.query("set names 'utf8'")
            self.query(sql)
            self.commit()
        except MySQLdb.Error as e:
            self.conn.rollback()
            print('MySql Error: %s %s'%(e.args[0],e.args[1]))  
        finally:  
            self.close()

    def insertList(self,tableName,dataList):  
        try:  
            for data in dataList:
                newData={}  
                for key in data:  
                    newData[key]="'"+data[key]+"'"  
                key=','.join(newData.keys())  
                value=','.join(newData.values())  
                sql="insert into "+tableName+"("+key+") values("+value+")"  
                self.query("set names 'utf8'")
                self.query(sql)
                self.commit()
        except MySQLdb.Error as e:
            self.conn.rollback()
            print('MySql Error: %s %s'%(e.args[0],e.args[1]))  
        finally:  
            self.close()
      
    def update(self,tableName,data,whereData):  
        try:  
            newData=[]  
            keys=data.keys()  
            for i in keys:  
                item="%s=%s"%(i,"'""'"+data[i]+"'")  
                newData.append(item)  
            items=','.join(newData)  
            newData2=[]  
            keys=whereData.keys()  
            for i in keys:  
                item="%s=%s"%(i,"'""'"+whereData[i]+"'")  
                newData2.append(item)  
            whereItems=" AND ".join(newData2)  
            sql="update "+tableName+" set "+items+" where "+whereItems  
            self.query("set names 'utf8'")  
            self.query(sql)  
            self.commit()  
        except MySQLdb.Error as e:  
            self.conn.rollback()  
            print('MySql Error: %s %s'%(e.args[0],e.args[1]))  
        finally:  
            self.close()  
      
    def getLastInsertRowId(self):  
        return self.cursor.lastrowid  
      
    def getRowCount(self):  
        return self.cursor.rowcount  
      
    def commit(self):  
        self.conn.commit()  
      
    def close(self):  
        self.cursor.close()  
        self.conn.close()  

存储房源信息

然后就是专门写个类往数据库读写数据:

import sdk.dbMySQLUtils as dbMySQLUtils

def store(list):
    dbHelper = dbMySQLUtils.DBMySQLUtils("localhost", "root", "123456")
    dbHelper.setDB("homelink")
    dbHelper.insertList("homelist", list)
    return

def query():
    dbHelper = dbMySQLUtils.DBMySQLUtils("localhost", "root", "123456")
    dbHelper.setDB("homelink")
    sql = "select * from homelist"
    rows = dbHelper.queryAll(sql)
    return rows

这里把数据库的信息替换成你自己的,我事先创建了homelink数据库,homelist表来存储上一章获取到的房源信息列表数据。homelist表结构如下:

create table homelist
(
    id int auto_increment
        primary key,
    hid varchar(100) not null,
    image varchar(500) not null,
    title varchar(500) not null,
    url varchar(500) not null,
    region varchar(100) null,
    zone varchar(100) null,
    meters varchar(100) null,
    direction varchar(100) null,
    area varchar(100) null,
    floor varchar(100) null,
    history varchar(100) null,
    subway varchar(100) null,
    hasKey varchar(100) null,
    decoration varchar(100) null,
    heating varchar(100) null,
    price varchar(100) null,
    priceUnit varchar(100) null,
    updateTime varchar(100) null,
    visited varchar(100) null,
    visitedAction varchar(100) null,
    createTime int not null,
    constraint homelist_id_uindex
        unique (id),
    constraint homelist_hid_uindex
        unique (hid)
)
engine=InnoDB
;

先获取到上一章方法提供的数据list,然后存储起来:

list = parseHtml.getList(1)
storeToDB.store(list)

数据就成功存储到数据库中了:


你可能感兴趣的:(Playground for Python - 数据存储)