数据清洗-> 数据入库-> 数据可视化 的 简单项目

数据从同事那里拿来,大概60万条,几百MB ,是某市面上保险柜子的数据,现在要分析这批数据。

数据清洗:略

数据入库:略

数据可视化:

#!/usr/bin/python3
 
import pymysql


type_list = ["userInfoSync","alertReport","changeNetwork","closeDoor","dataSync","deleteFP","dynPwd","dynPwdSecond",
             "formatDevice","heartbeat","lock_activation","network","openDoor","readStatus","regFP","resetting",
             "setCtlPwd","updateFirmware"]


def get_type_counts(): 

    config = {
        "mysql_config": {
            "host": "***",
            "user": "***",
            "password": "***",
            "database": "***"
                }
    }
    
    type_counts_dict={}
    
    user = config["mysql_config"]["user"]
    host = config["mysql_config"]["host"]
    password = config["mysql_config"]["password"]
    database = config["mysql_config"]["database"]
    # 打开数据库连接
    db = pymysql.connect(host,user ,password ,database , charset='utf8' )

    # 使用cursor()方法获取操作游标 
    cursor = db.cursor()

    # SQL 查询语句
    sql = "SELECT type,count(*) as freq  FROM dictionary WHERE type != 'NULL' and type != 'networkStatus' group by type ;"

    try:
       # 执行SQL语句
       cursor.execute(sql)
       # 获取所有记录列表
       results = cursor.fetchall()
       #print(results)
       for row in results:
          type = row[0]
          freq = row[1]
          type_counts_dict[type]=freq
        
        
#            # 打印结果
#           print ("type=%s,freq=%s" % \
#                  (type, freq ))
            
            
    except:
       print ("Error: unable to fetch data")

    # 关闭数据库连接
    db.close()
    return type_counts_dict

def fill_null_type(type_counts_dict,type_list):
    
    key_list = [ i for i in type_counts_dict]
    len_key_list = len(key_list)
    len_type_list = len(type_list)
    #查出的数据类型是否和默认业务类型作对比
    if len_key_list < len_type_list :
        null_type = list(set(type_list).difference(set(key_list)))
        print(null_type)
        for i in null_type:
            type_counts_dict[i] = 0
        return type_counts_dict
    elif len_key_list == type_list :
        print("Info: Data type is equals  business type!!!")
        return type_counts_dict
    else:
        print("Error: Data type is larger than business type!!!")
    return type_counts_dict

def data_visualization(type_counts_dict):
    import matplotlib.pyplot as plt
    import matplotlib
    
    matplotlib.rcParams['font.sans-serif'] = ['SimHei']
    matplotlib.rcParams['axes.unicode_minus'] = False
    #对字典进行排序
    type_counts_dict_sorted = sorted(zip(type_counts_dict.values(), type_counts_dict.keys()),reverse=True)
    datas = []
    type_name = []
    for x in type_counts_dict_sorted:
        datas.append(x[0])
        type_name.append(x[1])

    """
    绘制水平条形图方法barh
    参数一:y轴
    参数二:x轴
    """
    plt.barh(range(len(datas)), datas, height=0.5, color='steelblue', alpha=0.8)      # 从下往上画
    plt.yticks(range(len(type_name)), type_name)
    max_datas = max(datas)
    plt.xlim(0,max_datas+1000)
    plt.xlabel("Data Proportion")
    plt.title("Different types of data volume")
    for x, y in enumerate(datas):
        plt.text(y + 1/2, x - 0.1, '%s' % y)
    plt.show()


#获取数据
type_counts_dict = get_type_counts()
#填充业务上要求,数据中没有的类型
type_counts_dict = fill_null_type(type_counts_dict,type_list)   
#结果展示
data_visualization(type_counts_dict)

横条形图

#!/usr/bin/python3
 
import pymysql
import json

  

#获取数据
def get_type_counts(): 

    config = {
        "mysql_config": {
            "host": "****",
            "user": "***",
            "password": "***.***",
            "database": "****"
                }
    }
    
    user = config["mysql_config"]["user"]
    host = config["mysql_config"]["host"]
    password = config["mysql_config"]["password"]
    database = config["mysql_config"]["database"]
    
    open_Doortype_counts_dict={}
    
    # 打开数据库连接
    db = pymysql.connect(host,user ,password ,database , charset='utf8' )

    # 使用cursor()方法获取操作游标 
    cursor = db.cursor()

    # SQL 查询语句
    sql = "SELECT  msg  FROM dictionary WHERE type = 'openDoor';"
    
    try:
       # 执行SQL语句
       cursor.execute(sql)
       # 获取所有记录列表
       results = cursor.fetchall()
       #print(results)
       for row in results:
          line = str(row)[2:-3].strip("\\n")
          #print(line)
          open_Doortype = json.loads(line)["data"]["openDoorType"]
          if open_Doortype in open_Doortype_counts_dict.keys():
                open_Doortype_counts_dict[open_Doortype] += 1
          else:
                open_Doortype_counts_dict[open_Doortype] = 1
          
        
#            # 打印结果
#           print ("type=%s,freq=%s" % \
#                  (type, freq ))
            
            
    except:
       print ("Error: unable to fetch data")

    # 关闭数据库连接
    db.close()
    return open_Doortype_counts_dict


#获取数据
open_Doortype_counts_dict = get_type_counts()

#print(open_Doortype_counts_dict)
#{'3': 2191, '1': 1275}





#填充数据
def fill_null_type(open_Doortype_counts_dict):
    type_list = ["0","1","2","3","4"]
    key_list = [ i for i in open_Doortype_counts_dict]
    len_key_list = len(key_list)
    len_type_list = len(type_list)
    #查出的数据类型是否和默认业务类型作对比
    if len_key_list < len_type_list :
        null_type = list(set(type_list).difference(set(key_list)))
        print(null_type)
        for i in null_type:
            open_Doortype_counts_dict[i] = 0
        return open_Doortype_counts_dict
    elif len_key_list == type_list :
        print("Info: Data type is equals  business type!!!")
        return type_counts_dict
    else:
        print("Error: Data type is larger than business type!!!")
    return type_counts_dict

#  填充空值
open_Doortype_counts_dict = fill_null_type(open_Doortype_counts_dict)

#数据可视化
def data_visualization(open_Doortype_counts_dict):
    import numpy as np
    import matplotlib.pyplot as plt

    fig, ax = plt.subplots(figsize=(9, 20), subplot_kw=dict(aspect="equal"))
    datas = []
    type_name = []
    open_Doortype_name_dict={'0':"Bluetooth opening",'1':"Open the door remotely",'2':"Password open",'3':"Fingerprint opening",'4':"Dynamic cipher"}
    type_name_cn = {}
    #名称转换 0 -> 蓝牙开启
    for name in open_Doortype_counts_dict:
        if name in open_Doortype_name_dict.keys():
            type_name_cn[open_Doortype_name_dict[name]] = open_Doortype_counts_dict[name]
            
    for x in type_name_cn:
        datas.append(type_name_cn[x])
        type_name.append(x)


    def func(pct, allvals):
        absolute = int(pct/100.*np.sum(allvals))
        return "{:.1f}%\n({:d} )".format(pct, absolute)


    wedges, texts, autotexts = ax.pie(datas, autopct=lambda pct: func(pct, datas),
                                      textprops=dict(color="w"))

    # 标签距离
    ax.legend(wedges, type_name,
              title="Ingredients",
              loc="center left",
              bbox_to_anchor=(1, 0, 0.5, 0.5))
    #图上的字
    plt.setp(autotexts, size=20, weight="bold")
    # title
    ax.set_title("Open Door Type Proportion",size = 20)

    plt.show()  
    
data_visualization(open_Doortype_counts_dict)

 

 

 

就先这样吧。

 

 

 

 

你可能感兴趣的:(个人日记)