使用matplotlib进行简易的数据分析

现要分析一个地区的降雪数据,数据整理完毕,如下所示:

snow_area代表地区的降雪面积
total_output代表地区的降雪总量
per_hectare代表地区每公顷的降水量

snow_area=[83.1,350.7,5903.3,2716.2,4446.5,2975,4615.1,8543,137.1,5073,980.8,5603.9,829.8,3367.1,7166.3,9521.1,3931.4,4179.5,2077.3,2594.3,289.3,1260.9,4649.5,1816.5,3255,176.2,2548.7,1966.9,159.4,579.9,2297.1]

total_output=[52.3,194.2,3321.9,1232.9,2492.3,2017.1,3601.6,5435.2,98.6,3360.6,656,3252.5,496.1,2029.9,4505.2,5777,2428.5,2805.2,1170.8,1420.6,149.2,806.2,2846.6,855.5,1567.7,99.8,1114.2,883.1,61.1,331.8,1474.8]

per_hectare=[6296,5538,5627,4539,5605,6780,7804,6362,7190,6624,6689,5804,5978,6028,6287,6068,6177,6262,5636,5476,5157,6394,6122,4710,4816,5663,4371,4490,3832,5721,6420]

area=[‘北京’,‘天津’,‘河北’,‘山西’,‘内蒙古’,‘辽宁’,‘吉林’,‘黑龙江’,‘上海’,‘江苏’,‘浙江’,‘安徽’,‘福建’,‘江西’,‘山东’,‘河南’,‘湖北’,‘湖南’,‘广东’,‘广西’,‘海南’,‘重庆’,‘四川’,‘贵州’,‘云南’,‘西藏’,‘陕西’,‘甘肃’,‘青海’,‘宁夏’,‘新疆’]


项目需求:

1.读取表格中数据,进行相关数据分析。分析内容包括:求数据均值,标准差,方差,最大值。

2.对数据进行可视化处理,分别绘制出各项指标的柱形图,饼图和气泡图。


前期数据准备:

import matplotlib as mpl
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']

snow_area = [83.1, 350.7, 5903.3, 2716.2, 4446.5, 2975, 4615.1, 8543, 137.1, 5073, 980.8, 5603.9, 829.8, 3367.1, 7166.3,
             9521.1, 3931.4, 4179.5, 2077.3, 2594.3, 289.3, 1260.9, 4649.5, 1816.5, 3255, 176.2, 2548.7, 1966.9, 159.4,
             579.9, 2297.1]
total_output = [52.3,194.2,3321.9,1232.9,2492.3,2017.1,3601.6,5435.2,98.6,3360.6,656,3252.5,496.1,2029.9,4505.2,5777,2428.5,2805.2,1170.8,1420.6,149.2,806.2,2846.6,855.5,1567.7,99.8,1114.2,883.1,61.1,331.8,1474.8]
per_hectare = [6296,5538,5627,4539,5605,6780,7804,6362,7190,6624,6689,5804,5978,6028,6287,6068,6177,6262,5636,5476,5157,6394,6122,4710,4816,5663,4371,4490,3832,5721,6420]
area = ['北京','天津','河北','山西','内蒙古','辽宁','吉林','黑龙江','上海','江苏','浙江','安徽','福建','江西','山东','河南','湖北','湖南','广东','广西','海南','重庆','四川','贵州','云南','西藏','陕西','甘肃','青海','宁夏','新疆']

需求1、求最大值、均值、中位数、标准差

求最大值:

# 最大值
snow_area_max = max(snow_area) #python内置函数max
total_output_max = max(total_output)
per_hectare_max = max(per_hectare)
print('最大值:')
print(snow_area_max)
print(total_output_max)
print(per_hectare_max)

求均值:

# 均值
snow_area_mean = sum(snow_area)/len(snow_area)
total_output_mean = sum(total_output)/len(total_output)
per_hectare_mean = sum(per_hectare)/len(per_hectare)
print('均值:')
print(snow_area_mean)
print(total_output_mean)
print(per_hectare_mean)

求中位数:

# 中位数
def median(List):
    List=sorted(List)
    if len(List)%2==1:
        return List[len(List)//2]
    else:
        return (List[len(list)//2+List[len(list)//2-1]])/2
snow_area_median = median(snow_area)
total_output_median = median(total_output)
per_hectare_median = median(per_hectare)
print('中位数:')
print(snow_area_median)
print(total_output_median)
print(per_hectare_median)

求标准差:

# 标准差
import math
def stdev(List):
    mean = sum(List)/len(List)
    Sum = 0
    for item in List:
        Sum += (item-mean)**2
    Sum /= len(List)
    return math.sqrt(Sum)
snow_area_stdev = stdev(snow_area)
total_output_stdev = stdev(total_output)
per_hectare_stdev = stdev(per_hectare)
print('标准差:')
print(snow_area_stdev)
print(total_output_stdev)
print(per_hectare_stdev)

需求1完整代码:

import matplotlib as mpl
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']

snow_area = [83.1, 350.7, 5903.3, 2716.2, 4446.5, 2975, 4615.1, 8543, 137.1, 5073, 980.8, 5603.9, 829.8, 3367.1, 7166.3,
             9521.1, 3931.4, 4179.5, 2077.3, 2594.3, 289.3, 1260.9, 4649.5, 1816.5, 3255, 176.2, 2548.7, 1966.9, 159.4,
             579.9, 2297.1]
total_output = [52.3,194.2,3321.9,1232.9,2492.3,2017.1,3601.6,5435.2,98.6,3360.6,656,3252.5,496.1,2029.9,4505.2,5777,2428.5,2805.2,1170.8,1420.6,149.2,806.2,2846.6,855.5,1567.7,99.8,1114.2,883.1,61.1,331.8,1474.8]
per_hectare = [6296,5538,5627,4539,5605,6780,7804,6362,7190,6624,6689,5804,5978,6028,6287,6068,6177,6262,5636,5476,5157,6394,6122,4710,4816,5663,4371,4490,3832,5721,6420]
area = ['北京','天津','河北','山西','内蒙古','辽宁','吉林','黑龙江','上海','江苏','浙江','安徽','福建','江西','山东','河南','湖北','湖南','广东','广西','海南','重庆','四川','贵州','云南','西藏','陕西','甘肃','青海','宁夏','新疆']
#最大值、均值、中位数、标准差
# 最大值
snow_area_max = max(snow_area) #python内置函数max
total_output_max = max(total_output)
per_hectare_max = max(per_hectare)
print('最大值:')
print(snow_area_max)
print(total_output_max)
print(per_hectare_max)
# 均值
snow_area_mean = sum(snow_area)/len(snow_area)
total_output_mean = sum(total_output)/len(total_output)
per_hectare_mean = sum(per_hectare)/len(per_hectare)
print('均值:')
print(snow_area_mean)
print(total_output_mean)
print(per_hectare_mean)
# 中位数
def median(List):
    List=sorted(List)
    if len(List)%2==1:
        return List[len(List)//2]
    else:
        return (List[len(list)//2+List[len(list)//2-1]])/2
snow_area_median = median(snow_area)
total_output_median = median(total_output)
per_hectare_median = median(per_hectare)
print('中位数:')
print(snow_area_median)
print(total_output_median)
print(per_hectare_median)
# 标准差
import math
def stdev(List):
    mean = sum(List)/len(List)
    Sum = 0
    for item in List:
        Sum += (item-mean)**2
    Sum /= len(List)
    return math.sqrt(Sum)
snow_area_stdev = stdev(snow_area)
total_output_stdev = stdev(total_output)
per_hectare_stdev = stdev(per_hectare)
print('标准差:')
print(snow_area_stdev)
print(total_output_stdev)
print(per_hectare_stdev)

输出结果:

使用matplotlib进行简易的数据分析_第1张图片


需求2、绘制柱状图、饼图、气泡图

柱状图的绘制:

#柱状图
plt.figure(figsize=(20,10))
#因为横坐标过多,不能正常显示,所以使用figure调整画布大小,20是长,10是高
plt.bar(range(1,len(snow_area)+1),snow_area)
plt.xticks(range(1,len(snow_area)+1),area)
plt.xlabel('省份')
plt.ylabel('降水面积')
plt.title('各省降水面积')
plt.show()

输出结果:使用matplotlib进行简易的数据分析_第2张图片
饼图的绘制:

#饼图
#每调用一次plt.show()函数,画布就会清空
plt.figure(figsize=(30,30))
plt.pie(snow_area,labels = area,autopct = "%1.1f%%")
plt.title('各省降雪面积占比图')
plt.show()

输出结果:
使用matplotlib进行简易的数据分析_第3张图片
气泡图的绘制:

#气泡图,反应降雪面积、降雪总量和单位降雪量的关系
per_hectare = [item/30 for item in per_hectare]
#因为值太大了,观测效果不清晰,所以将其除30处理
plt.scatter(snow_area,total_output,s = per_hectare)
plt.xlabel('降雪面积')
plt.ylabel('降雪总量')
plt.title('降雪面积/总量/单位降雪量气泡图')
plt.show()

输出结果:
使用matplotlib进行简易的数据分析_第4张图片
完整代码如下:

import matplotlib as mpl
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']

snow_area = [83.1, 350.7, 5903.3, 2716.2, 4446.5, 2975, 4615.1, 8543, 137.1, 5073, 980.8, 5603.9, 829.8, 3367.1, 7166.3,
             9521.1, 3931.4, 4179.5, 2077.3, 2594.3, 289.3, 1260.9, 4649.5, 1816.5, 3255, 176.2, 2548.7, 1966.9, 159.4,
             579.9, 2297.1]
total_output = [52.3,194.2,3321.9,1232.9,2492.3,2017.1,3601.6,5435.2,98.6,3360.6,656,3252.5,496.1,2029.9,4505.2,5777,2428.5,2805.2,1170.8,1420.6,149.2,806.2,2846.6,855.5,1567.7,99.8,1114.2,883.1,61.1,331.8,1474.8]
per_hectare = [6296,5538,5627,4539,5605,6780,7804,6362,7190,6624,6689,5804,5978,6028,6287,6068,6177,6262,5636,5476,5157,6394,6122,4710,4816,5663,4371,4490,3832,5721,6420]
area = ['北京','天津','河北','山西','内蒙古','辽宁','吉林','黑龙江','上海','江苏','浙江','安徽','福建','江西','山东','河南','湖北','湖南','广东','广西','海南','重庆','四川','贵州','云南','西藏','陕西','甘肃','青海','宁夏','新疆']
#最大值、均值、中位数、标准差
# 最大值
snow_area_max = max(snow_area) #python内置函数max
total_output_max = max(total_output)
per_hectare_max = max(per_hectare)
print('最大值:')
print(snow_area_max)
print(total_output_max)
print(per_hectare_max)
# 均值
snow_area_mean = sum(snow_area)/len(snow_area)
total_output_mean = sum(total_output)/len(total_output)
per_hectare_mean = sum(per_hectare)/len(per_hectare)
print('均值:')
print(snow_area_mean)
print(total_output_mean)
print(per_hectare_mean)
# 中位数
def median(List):
    List=sorted(List)
    if len(List)%2==1:
        return List[len(List)//2]
    else:
        return (List[len(list)//2+List[len(list)//2-1]])/2
snow_area_median = median(snow_area)
total_output_median = median(total_output)
per_hectare_median = median(per_hectare)
print('中位数:')
print(snow_area_median)
print(total_output_median)
print(per_hectare_median)
# 标准差
import math
def stdev(List):
    mean = sum(List)/len(List)
    Sum = 0
    for item in List:
        Sum += (item-mean)**2
    Sum /= len(List)
    return math.sqrt(Sum)
snow_area_stdev = stdev(snow_area)
total_output_stdev = stdev(total_output)
per_hectare_stdev = stdev(per_hectare)
print('标准差:')
print(snow_area_stdev)
print(total_output_stdev)
print(per_hectare_stdev)
#柱状图
plt.figure(figsize=(20,10))
#因为横坐标过多,不能正常显示,所以使用figure调整画布大小,20是长,10是高
plt.bar(range(1,len(snow_area)+1),snow_area)
plt.xticks(range(1,len(snow_area)+1),area)
plt.xlabel('省份')
plt.ylabel('降水面积')
plt.title('各省降水面积')
plt.show()
#饼图
#每调用一次plt.show()函数,画布就会清空
plt.figure(figsize=(30,30))
plt.pie(snow_area,labels = area,autopct = "%1.1f%%")
plt.title('各省降雪面积占比图')
plt.show()
#气泡图,反应降雪面积、降雪总量和单位降雪量的关系
per_hectare = [item/30 for item in per_hectare]
#因为值太大了,观测效果不清晰,所以将其除30处理
plt.scatter(snow_area,total_output,s = per_hectare)
plt.xlabel('降雪面积')
plt.ylabel('降雪总量')
plt.title('降雪面积/总量/单位降雪量气泡图')
plt.show()

输出结果在文章上文中均有展示,可通过上文整理相应的输出结果。


欢迎大家查看作者的主页,主页中还有关于编程与算法方面的更多内容,欢迎大家相互沟通学习~

你可能感兴趣的:(matplotlib,数据分析,python)