目录
Matplotlib绘图
折线图绘制
.plot()
子图操作
.add_subplot(x,y,z)
条形图
plt.subplots()
散点图
.scatter()
柱形图
.hist()
盒图
.boxplot()
这里用到的数据集可以在https://github.com/account_verifications?resent=1上找到并下载
# 每年的失业率
import pandas as pd
unrate = pd.read_csv("unrate.csv")
# pd.to_datetime(col)将col这一列时间转化成标准-格式
unrate['DATE'] = pd.to_datetime(unrate['DATE'])
print(unrate.head(12))
DATE VALUE 0 1948-01-01 3.4 1 1948-02-01 3.8 2 1948-03-01 4.0 3 1948-04-01 3.9 4 1948-05-01 3.5 5 1948-06-01 3.6 6 1948-07-01 3.6 7 1948-08-01 3.9 8 1948-09-01 3.8 9 1948-10-01 3.7 10 1948-11-01 3.8 11 1948-12-01 4.0
first_twelve = unrate[0:12]
# plt.plot(x,y):x轴,y轴
plt.plot(first_twelve['DATE'],first_twelve['VALUE'])
plt.xticks(rotation=45)# 横坐标上倾斜45度显示
plt.xlabel("Month")# 横坐标的含义
plt.ylabel("Unemployment Rate")# 纵坐标的含义
plt.title("Monthly Unemployment Trends, 1948") # 图的标题
plt.show()
import matplotlib.pyplot as plt
fig = plt.figure()# 默认画图域
# fig.add_subplot(x,y,z):x*y表示子图块的维度,z表示第几块
ax1 = fig.add_subplot(2,2,1)# 2*2的子图块中第一块
ax2 = fig.add_subplot(2,2,2)# 第二块
ax3 = fig.add_subplot(2,2,4)# 第四块
plt.show()
import numpy as np
fig = plt.figure(figsize=(3,4))# 指定画图域的大小(长3,宽4)
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax1.plot(np.random.randint(1,5,5),np.arange(5))# 随机生成一些点
ax2.plot(np.arange(10)*3,np.arange(10))
plt.show()
fig = plt.figure(figsize=(10,6))
colors = ['red','blue','green','orange','black']
for i in range(5):
start_index=i*12
end_index=(i+1)*12
subset = unrate[start_index:end_index]
label=str(1948 +i)
plt.plot(subset['MONTH'],subset['VALUE'],c=colors[i],label=label)
plt.legend(loc='best')# 加上图例 loc:best(自定义位置),upper right(放右上角)
plt.xlabel("Month")# 横坐标的含义
plt.ylabel("Unemployment Rate")# 纵坐标的含义
plt.title("Monthly Unemployment Trends") # 图的标题
# print(help(plt.legend))
plt.show()
import matplotlib.pyplot as plt
from numpy import arange
num_cols = ['RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
# 当前柱的高度
bar_heights = norm_reviews.loc[0, num_cols].values
print(bar_heights)# 评分值
# bar_positions每个柱的位置
bar_positions = arange(5) + 0.75 #每个横轴上的点到零点的距离
tick_positions = range(1,6)
print(bar_positions)
fig, ax=plt.subplots()
# bar型图
ax.bar(bar_positions,bar_heights,0.3)# 0.3:柱的宽度
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols,rotation=45)
ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()
[4.3 3.55 3.9 4.5 5.0] [0.75 1.75 2.75 3.75 4.75]
#把上述代码
#ax.bar(bar_positions,bar_heights,0.3)# 0.3:柱的宽度
#改为
ax.barh(bar_positions,bar_heights,0.5)
ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)
ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
fig, ax = plt.subplots()
ax.norm_reviews['Fandango_Ratingvalue'],norm_reviews['RT_user_norm'])
ax.set_xlabel("Fandango")
ax.set_ylabel("Rotten Tomatoes")
plt.show()
fandango_distribution = norm_reviews["Fandango_Ratingvalue"].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imdb_distribution =norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()
#print(fandango_distribution)
#print(imdb_distribution)
fig, ax = plt.subplots()
#ax.hist(norm_reviews['Fandango_Ratingvalue']) #不指定bins默认10段
ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
#ax.hist(norm_reviews['Fandango_Ratingvalue'],range(4,5),bins=20) 这句有问题
plt.show()
fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
ax1.hist(norm_reviews['Fandango_Ratingvalue'],bins=20,range=(0,5))
ax1.set_title("Distribution of Fandango Ratinfs")
ax1.set_ylim(0,50)# 设置y轴取值范围
ax2.hist(norm_reviews['RT_user_norm'],bins=20,range=(0,5))
ax2.set_title("Distribution of RT_user_norm Ratinfs")
ax2.set_ylim(0,50)
ax3.hist(norm_reviews['Metacritic_user_nom'],bins=20,range=(0,5))
ax3.set_title("Distribution of Metacritic_user_nom Ratinfs")
ax3.set_ylim(0,50)
ax4.hist(norm_reviews['IMDB_norm'],bins=20,range=(0,5))
ax4.set_title("Distribution of IMDB_norm Ratinfs")
ax4.set_ylim(0,50)
plt.show()
num_cols=['RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols,rotation=45)
ax.set_ylim(0,5)
plt.show()
以上图均可以参考官方文档完成,不必特意去记住。文档中还有一些比较细节的设置。