Matplotlib 库是 python 的数据可视化库
import matplotlib.pyplot as plt
1、字符串转化为日期
unrate = pd.read_csv("unrate.csv")
unrate["DATE"] = pd.to_datetime(unrate["DATE"])
2、拆线图
data1 = unrate[0: 12]
plt.plot(data1["DATE"], data1["VALUE"]) #x轴数据和y轴数据
plt.xticks(rotation = 45) #将x轴的属性旋转一个角度
plt.xlabel("Date Month") #x轴描述
plt.ylabel("Rate Value") #y轴描述
plt.title("my first plt") #标题
plt.show()
3、多图拼切
fig =plt.figure()
ax1= fig.add_subplot(2, 1, 1)
ax2= fig.add_subplot(2, 1, 2)
ax1.plot(np.random.randint(1, 5, 5), np.arange(5))
ax2.plot(np.arange(10)*3, np.arange(10))
plt.show()
4、一图多线
fig = plt.figure(figsize=(6, 3)) #设定图尺寸
data1= unrate[0: 12]
data1["MONTH"] = data1["DATE"].dt.month
plt.plot(data1["MONTH"], data1["VALUE"], c="red")
data2= unrate[12: 24]
data2["MONTH"] = data2["DATE"].dt.month
plt.plot(data2["MONTH"], data2["VALUE"], c="blue")
plt.xticks(rotation= 45) #将x轴的属性旋转一个角度
plt.xlabel("Date Month")
plt.ylabel("Rate Value")
plt.title("my first plt")
plt.show()
5、一图多线 - 自动跑代码(带图例)
fig = plt.figure(figsize=(10, 6))
colors= ['red', 'blue', 'green', 'orange', 'black']for i in range(5):
start_index= i*12end_index= (i+1)*12subset=unrate[start_index: end_index]
label= str(1948 +i)
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)#plt.legend(loc='best')
plt.legend(loc = 'upper left') #位置
plt.show()
6、条形图
fand_col = ["Fandango_Stars", "Fandango_Ratingvalue", "Metacritic_norm", "RT_user_norm_round", "IMDB_norm_round"]
bar_heights= fand_new.ix[0, fand_col].values #条形图高度
bar_positions = np.arange(5) + 0.75 #条形图起始位置
tick_positions = range(1, 6)
fig, ax=plt.subplots()
ax.bar(bar_positions, bar_heights,0.5) #0.5表示条形图的宽度
ax.set_xticks(tick_positions)
ax.set_xticklabels(fand_col, rotation= 90)
ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()
7、条形图 - 横向
fand_col = ["Fandango_Stars", "Fandango_Ratingvalue", "Metacritic_norm", "RT_user_norm_round", "IMDB_norm_round"]
bar_heights=fand_new.ix[0, fand_col].values
bar_positions= np.arange(5) + 0.75tick_positions= range(1, 6)
fig, ax=plt.subplots()
ax.barh(bar_positions, bar_heights,0.5) #横向
ax.set_yticks(tick_positions)
ax.set_yticklabels(fand_col, rotation=0)
ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()
8、散点图
fig, ax =plt.subplots()
ax.scatter(fand_new['Fandango_Stars'], fand_new['Metacritic_norm']) #散点图
ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')
plt.show()
9、直方图
fandango_distribution = fand_new['Fandango_Stars'].value_counts()
fandango_distribution=fandango_distribution.sort_index()
imdb_distribution= fand_new['IMDB_norm_round'].value_counts()
imdb_distribution=imdb_distribution.sort_index()#bins 是什么?通俗一点就是分组,将N多数据分成X组。默认:bins=10
fig, ax =plt.subplots()
ax.hist(fand_new['Fandango_Stars'], range=(4, 5), bins=5) #range 需要查看x轴的范围
plt.show()
10、多图
fig = plt.figure(figsize=(12, 12))
ax1= fig.add_subplot(2,2,1)
ax2= fig.add_subplot(2,2,2)
ax3= fig.add_subplot(2,2,3)
ax4= fig.add_subplot(2,2,4)
ax1.hist(fand_new['Fandango_Stars'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
ax1.set_ylim(0,50)
ax2.hist(fand_new['IMDB_norm_round'], 20, range=(0, 5))
ax2.set_title('Distribution of Rotten Tomatoes Ratings')
ax2.set_ylim(0,50)
ax3.hist(fand_new['Metacritic_norm'], 20, range=(0, 5))
ax3.set_title('Distribution of Metacritic Ratings')
ax3.set_ylim(0,50)
ax4.hist(fand_new['RT_user_norm_round'], 20, range=(0, 5))
ax4.set_title('Distribution of IMDB Ratings')
ax4.set_ylim(0,50)
plt.show()
11、四分图
fig, ax =plt.subplots()
ax.boxplot(fand_new['Metacritic_norm'])
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0,5)
plt.show()
12、多图 - 通过数组
num_cols = ['Fandango_Stars', 'IMDB_norm_round', 'Metacritic_norm', 'RT_user_norm_round']
fig, ax=plt.subplots()
ax.boxplot(fand_new[num_cols].values)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0,5)
plt.show()
13、数据可视化 - 简洁一些
fig, ax =plt.subplots()
ax.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men')
ax.tick_params(bottom="off", top="off", left="off", right="off") #可配置参数
for key,spine inax.spines.items():
spine.set_visible(False)
ax.legend(loc='upper right')
plt.show()
14、数据可视化 - 多图 - 通过程序
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig= plt.figure(figsize=(12, 12))for sp in range(0, 4):
ax= fig.add_subplot(2, 2, sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men')
plt.legend(loc='upper right')
plt.show()
15、数据可视化 - 多图 - 通过程序跑 - 多图 简洁
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig= plt.figure(figsize=(12, 12))for sp in range(0, 4):
ax= fig.add_subplot(2, 2, sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men')for key,spine inax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.legend(loc='upper right')
plt.show()
16、如何使图表更好看?
cb_dark_blue = (0/255, 107/255, 164/255) #自定义颜色
cb_orange = (255/255, 128/255, 14/255)
fig= plt.figure(figsize=(12, 12))for sp in range(0, 4):
ax= fig.add_subplot(2, 2, sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men')for key,spine inax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.legend(loc='upper right')
plt.show()
17、加粗线
cb_dark_blue = (0/255, 107/255, 164/255)
cb_orange= (255/255, 128/255, 14/255)
fig= plt.figure(figsize=(18, 3))for sp in range(0, 4):
ax= fig.add_subplot(1, 4, sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) #线条粗细
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=3)for key,spine inax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.legend(loc='upper right')
plt.show()
18、加注释
fig = plt.figure(figsize=(18, 3))for sp in range(0, 4):
ax= fig.add_subplot(1, 4, sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=3)for key,spine inax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")if sp ==0:
ax.text(2005, 87, 'Men') #注释
ax.text(2002, 8, 'Women')elif sp == 3:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
plt.show()