数据从同事那里拿来,大概60万条,几百MB ,是某市面上保险柜子的数据,现在要分析这批数据。
数据清洗:略
数据入库:略
数据可视化:
#!/usr/bin/python3
import pymysql
type_list = ["userInfoSync","alertReport","changeNetwork","closeDoor","dataSync","deleteFP","dynPwd","dynPwdSecond",
"formatDevice","heartbeat","lock_activation","network","openDoor","readStatus","regFP","resetting",
"setCtlPwd","updateFirmware"]
def get_type_counts():
config = {
"mysql_config": {
"host": "***",
"user": "***",
"password": "***",
"database": "***"
}
}
type_counts_dict={}
user = config["mysql_config"]["user"]
host = config["mysql_config"]["host"]
password = config["mysql_config"]["password"]
database = config["mysql_config"]["database"]
# 打开数据库连接
db = pymysql.connect(host,user ,password ,database , charset='utf8' )
# 使用cursor()方法获取操作游标
cursor = db.cursor()
# SQL 查询语句
sql = "SELECT type,count(*) as freq FROM dictionary WHERE type != 'NULL' and type != 'networkStatus' group by type ;"
try:
# 执行SQL语句
cursor.execute(sql)
# 获取所有记录列表
results = cursor.fetchall()
#print(results)
for row in results:
type = row[0]
freq = row[1]
type_counts_dict[type]=freq
# # 打印结果
# print ("type=%s,freq=%s" % \
# (type, freq ))
except:
print ("Error: unable to fetch data")
# 关闭数据库连接
db.close()
return type_counts_dict
def fill_null_type(type_counts_dict,type_list):
key_list = [ i for i in type_counts_dict]
len_key_list = len(key_list)
len_type_list = len(type_list)
#查出的数据类型是否和默认业务类型作对比
if len_key_list < len_type_list :
null_type = list(set(type_list).difference(set(key_list)))
print(null_type)
for i in null_type:
type_counts_dict[i] = 0
return type_counts_dict
elif len_key_list == type_list :
print("Info: Data type is equals business type!!!")
return type_counts_dict
else:
print("Error: Data type is larger than business type!!!")
return type_counts_dict
def data_visualization(type_counts_dict):
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False
#对字典进行排序
type_counts_dict_sorted = sorted(zip(type_counts_dict.values(), type_counts_dict.keys()),reverse=True)
datas = []
type_name = []
for x in type_counts_dict_sorted:
datas.append(x[0])
type_name.append(x[1])
"""
绘制水平条形图方法barh
参数一:y轴
参数二:x轴
"""
plt.barh(range(len(datas)), datas, height=0.5, color='steelblue', alpha=0.8) # 从下往上画
plt.yticks(range(len(type_name)), type_name)
max_datas = max(datas)
plt.xlim(0,max_datas+1000)
plt.xlabel("Data Proportion")
plt.title("Different types of data volume")
for x, y in enumerate(datas):
plt.text(y + 1/2, x - 0.1, '%s' % y)
plt.show()
#获取数据
type_counts_dict = get_type_counts()
#填充业务上要求,数据中没有的类型
type_counts_dict = fill_null_type(type_counts_dict,type_list)
#结果展示
data_visualization(type_counts_dict)
横条形图
#!/usr/bin/python3
import pymysql
import json
#获取数据
def get_type_counts():
config = {
"mysql_config": {
"host": "****",
"user": "***",
"password": "***.***",
"database": "****"
}
}
user = config["mysql_config"]["user"]
host = config["mysql_config"]["host"]
password = config["mysql_config"]["password"]
database = config["mysql_config"]["database"]
open_Doortype_counts_dict={}
# 打开数据库连接
db = pymysql.connect(host,user ,password ,database , charset='utf8' )
# 使用cursor()方法获取操作游标
cursor = db.cursor()
# SQL 查询语句
sql = "SELECT msg FROM dictionary WHERE type = 'openDoor';"
try:
# 执行SQL语句
cursor.execute(sql)
# 获取所有记录列表
results = cursor.fetchall()
#print(results)
for row in results:
line = str(row)[2:-3].strip("\\n")
#print(line)
open_Doortype = json.loads(line)["data"]["openDoorType"]
if open_Doortype in open_Doortype_counts_dict.keys():
open_Doortype_counts_dict[open_Doortype] += 1
else:
open_Doortype_counts_dict[open_Doortype] = 1
# # 打印结果
# print ("type=%s,freq=%s" % \
# (type, freq ))
except:
print ("Error: unable to fetch data")
# 关闭数据库连接
db.close()
return open_Doortype_counts_dict
#获取数据
open_Doortype_counts_dict = get_type_counts()
#print(open_Doortype_counts_dict)
#{'3': 2191, '1': 1275}
#填充数据
def fill_null_type(open_Doortype_counts_dict):
type_list = ["0","1","2","3","4"]
key_list = [ i for i in open_Doortype_counts_dict]
len_key_list = len(key_list)
len_type_list = len(type_list)
#查出的数据类型是否和默认业务类型作对比
if len_key_list < len_type_list :
null_type = list(set(type_list).difference(set(key_list)))
print(null_type)
for i in null_type:
open_Doortype_counts_dict[i] = 0
return open_Doortype_counts_dict
elif len_key_list == type_list :
print("Info: Data type is equals business type!!!")
return type_counts_dict
else:
print("Error: Data type is larger than business type!!!")
return type_counts_dict
# 填充空值
open_Doortype_counts_dict = fill_null_type(open_Doortype_counts_dict)
#数据可视化
def data_visualization(open_Doortype_counts_dict):
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(9, 20), subplot_kw=dict(aspect="equal"))
datas = []
type_name = []
open_Doortype_name_dict={'0':"Bluetooth opening",'1':"Open the door remotely",'2':"Password open",'3':"Fingerprint opening",'4':"Dynamic cipher"}
type_name_cn = {}
#名称转换 0 -> 蓝牙开启
for name in open_Doortype_counts_dict:
if name in open_Doortype_name_dict.keys():
type_name_cn[open_Doortype_name_dict[name]] = open_Doortype_counts_dict[name]
for x in type_name_cn:
datas.append(type_name_cn[x])
type_name.append(x)
def func(pct, allvals):
absolute = int(pct/100.*np.sum(allvals))
return "{:.1f}%\n({:d} )".format(pct, absolute)
wedges, texts, autotexts = ax.pie(datas, autopct=lambda pct: func(pct, datas),
textprops=dict(color="w"))
# 标签距离
ax.legend(wedges, type_name,
title="Ingredients",
loc="center left",
bbox_to_anchor=(1, 0, 0.5, 0.5))
#图上的字
plt.setp(autotexts, size=20, weight="bold")
# title
ax.set_title("Open Door Type Proportion",size = 20)
plt.show()
data_visualization(open_Doortype_counts_dict)
就先这样吧。