本章内容包括:pandas绘图
//input1
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
df = pd.read_csv('DataAnalyst_utf.csv')
df2 = pd.read_csv('DataAnalystAll_utf.csv')
get_ipython().run_line_magic('matplotlib', 'inline')
# - 折线图 plot
# - 柱形图 bar
# - 直方图 hist
# - 箱线图 box
# - 密度图 kde
# - 面积图 area
# - 散点图 scatter
# - 散点图矩阵 scatter_matrix
# - 饼图 pie
# In[2]:
df.head()
# In[3]:
df.avg.plot()
# In[4]:
df.avg.value_counts().sort_index().plot()
# In[5]:
df.avg.value_counts().sort_index().plot.bar()
# In[6]:
df.pivot_table(index = 'city',columns = 'education',values = 'avg',aggfunc = 'count').plot.bar(stacked = True)
# In[7]:
df.pivot_table(index = 'city',columns = 'education',values = 'avg',aggfunc = 'count').plot.barh(stacked = True)
# In[8]:
df.avg.plot.hist()
# In[9]:
df.groupby('education').apply(lambda x:x.avg).unstack().T.plot.hist(alpha = 0.8,stacked = True, bins = 30)
# In[10]:
df.groupby('education').apply(lambda x:x.avg).unstack().T.plot.hist(alpha = 0.8,stacked = True, bins = 30,orientation ='horizontal' )
# In[11]:
df.boxplot(column = 'avg', by = 'education')
# In[12]:
df.avg.plot.kde()
# In[13]:
matrix = df2.groupby('companyId').aggregate(['mean','count','max']).avg
matrix.plot.scatter(x = 'mean',y = 'max')
# In[14]:
pd.plotting.scatter_matrix(matrix.query('count < 50'))
# In[15]:
pd.plotting.scatter_matrix(matrix.query('count < 50'),diagonal = 'kde')
//output1
以下是按照顺序依次出的图。
#!/usr/bin/env python
# coding: utf-8
# In[2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
# In[3]:
df = pd.read_csv('DataAnalyst_utf.csv')
# In[4]:
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = True
# In[8]:
plt.figure(1,figsize = (10,4))
plt.plot(np.random.random_integers(-20,20,20))
plt.title('这是一条折线图')
plt.xticks([0,10,30])
plt.xlabel('x轴')
plt.show()
# - title
# - data
# - x轴
# - x轴刻度 xtick
# - x轴标签 xlabel
# - y轴
#!/usr/bin/env python
# coding: utf-8
# In[25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
# In[26]:
df = pd.read_csv('DataAnalyst_utf.csv')
# In[27]:
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# In[35]:
data = df.groupby(['city','education']).aggregate(['mean','count']).avg.reset_index()
data.head()
# In[37]:
for city,grouped in data.groupby('city'):
x = grouped['mean']
y = grouped['count']
plt.scatter(x,y,label = city)
plt.legend(loc = 'upper right')
plt.xlabel('平均薪资')
plt.ylabel('招聘数')
plt.legend()
plt.show()
# In[ ]:
#!/usr/bin/env python
# coding: utf-8
# In[2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
# In[3]:
df = pd.read_csv('DataAnalyst_utf.csv')
# In[4]:
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# In[7]:
plt.figure(figsize = (12,4))
#第一张图绘制
plt.subplot(221)
plt.plot(np.random.random_integers(-20,20,20),label = '1')
plt.plot(np.random.random_integers(-20,20,20),label = '2')
plt.legend() #标签
#第二张图绘制
plt.subplot(222)
plt.plot(np.random.random_integers(-20,20,20),label = '3')
plt.plot(np.random.random_integers(-20,20,20),label = '4')
plt.legend() #标签
#第三张图绘制
plt.subplot(212)
plt.plot(np.random.random_integers(-20,20,20),label = '5')
plt.legend() #标签
plt.figure(figsize = (12,4))
#第一张图绘制
plt.subplot(221)
plt.plot(np.random.random_integers(-20,20,20),label = '1')
plt.show()
# In[11]:
data = df.groupby(['city','education']).aggregate(['mean','count']).avg.reset_index()
# In[12]:
plt.figure(figsize = (12,4))
plt.subplot(121)
plt.plot(np.random.random_integers(-20,20,20),label = '1')
for city,grouped in data.groupby('city'):
x = grouped['mean']
y = grouped['count']
plt.subplot(122)
plt.scatter(x,y,label= city)
plt.scatter(x,y,label = city)
plt.legend(loc = 'upper right')
plt.xlabel('平均薪资')
plt.ylabel('招聘数')
plt.legend()
plt.show()
# In[ ]: