【数据分析】绘制统计图

频率分布直方图

# 运行以下代码
# sort the values from the top to the least value and slice the first 5 items
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

path7 = './data.csv'  # train.csv
titanic = pd.read_csv(path7)
print(titanic.describe())  #输出平均值,方差之类的基本统计信息

df = titanic['data'].sort_values(ascending = False)

# create bins interval using numpy
binsVal = np.arange(0,100000,4000) #边界自己设置,从0~100000,以4000为等差,进行增加

# create the plot
plt.hist(df, bins = binsVal)

# Set the title and labels
plt.xlabel('data')      #设置x标签的名字
plt.ylabel('Frequency') #设置y标签的名字
plt.title('Fare Payed Histrogram')

# show the plot
plt.show()

【数据分析】绘制统计图_第1张图片

正态分布直方图

# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import os


x = []
csvFile1 = open('G:/Test/5-25/data.csv','r',newline='')
with csvFile1 as f:
    for line in f.readlines():
        x.append(int(line.split(',')[0]))    

mu = np.mean(x)
sigma = np.std(x)
# x = mu + sigma * np.random.randn(10000)
# 在均值周围产生符合正态分布的x值

num_bins = 50  #50个柱形

n, bins, patches = plt.hist(x, num_bins, normed=1, facecolor='green', alpha=0.5)
# 直方图函数: x为x轴的值,normed=1表示为概率密度,即和为一,绿色方块,色深参数0.5.
# 返回值为n个概率,直方块左边线的x值,及各个方块对象

y = mlab.normpdf(bins, mu, sigma)
# 画一条逼近的曲线,y只负责绘制一条高斯分布的曲线

plt.plot(bins, y, 'r--')
plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title(r'Histogram of IQ: $\mu=100$ $\sigma=15$') # 中文标题 u'xxx'

plt.subplots_adjust(left=0.15) # 左边距
plt.show()

【数据分析】绘制统计图_第2张图片

饼图

# 运行以下代码
# sort the values from the top to the least value and slice the first 5 items
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

path7 = './data.csv'  # train.csv
titanic = pd.read_csv(path7)
## 运行以下代码
# sum the instances of males and females
class1 = (titanic['class'] == 1).sum()
class2 = (titanic['class'] == 2).sum()
class3 = (titanic['class'] == 3).sum()
class4 = (titanic['class'] == 4).sum()

# put them into a list called proportions
proportions = [class1,class2,class3,class4]

# Create a pie chart
plt.pie(
    # using proportions
    proportions,

    # with the labels being officer names
    labels = ['class1', 'class2','class3','class4'],  #标签

    # with no shadows
    shadow = False,

    # with colors
    colors = ['blue','red','green','yellow'], #标签颜色

    # with one slide exploded out
    explode = (0.15 , 0 , 0 , 0 ), #离心距离,在此只有蓝色,也就是第一个离心了

    # with the start angle at 90%
    startangle = 90,

    # with the percent listed as a fraction
    autopct = '%1.1f%%'#保留小数点之后1位
    )

# View the plot drop above
plt.axis('equal')

# Set labels
plt.title("Sex Proportion")

# View the plot
plt.tight_layout()
plt.show()

【数据分析】绘制统计图_第3张图片

你可能感兴趣的:(python_ing)