python基础-26-数据分析python——pandas——可视化

本章内容包括:pandas绘图



//input1
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
df = pd.read_csv('DataAnalyst_utf.csv')
df2 = pd.read_csv('DataAnalystAll_utf.csv')
get_ipython().run_line_magic('matplotlib', 'inline')


# - 折线图 plot
# - 柱形图 bar
# - 直方图 hist
# - 箱线图 box
# - 密度图 kde
# - 面积图 area
# - 散点图 scatter
# - 散点图矩阵 scatter_matrix
# - 饼图 pie

# In[2]:


df.head()


# In[3]:


df.avg.plot()


# In[4]:


df.avg.value_counts().sort_index().plot()


# In[5]:


df.avg.value_counts().sort_index().plot.bar()


# In[6]:


df.pivot_table(index = 'city',columns = 'education',values = 'avg',aggfunc = 'count').plot.bar(stacked = True)


# In[7]:


df.pivot_table(index = 'city',columns = 'education',values = 'avg',aggfunc = 'count').plot.barh(stacked = True)


# In[8]:


df.avg.plot.hist()


# In[9]:


df.groupby('education').apply(lambda x:x.avg).unstack().T.plot.hist(alpha = 0.8,stacked = True, bins = 30)


# In[10]:


df.groupby('education').apply(lambda x:x.avg).unstack().T.plot.hist(alpha = 0.8,stacked = True, bins = 30,orientation ='horizontal' )


# In[11]:


df.boxplot(column = 'avg', by = 'education')


# In[12]:


df.avg.plot.kde()


# In[13]:


matrix = df2.groupby('companyId').aggregate(['mean','count','max']).avg
matrix.plot.scatter(x = 'mean',y = 'max')


# In[14]:


pd.plotting.scatter_matrix(matrix.query('count < 50'))


# In[15]:


pd.plotting.scatter_matrix(matrix.query('count < 50'),diagonal = 'kde')





//output1

以下是按照顺序依次出的图。


1

2

3

4

5
#!/usr/bin/env python
# coding: utf-8

# In[2]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')


# In[3]:


df = pd.read_csv('DataAnalyst_utf.csv')


# In[4]:


plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = True


# In[8]:


plt.figure(1,figsize = (10,4))
plt.plot(np.random.random_integers(-20,20,20))
plt.title('这是一条折线图')
plt.xticks([0,10,30])
plt.xlabel('x轴')
plt.show()


# - title
# - data
# - x轴
#     - x轴刻度 xtick
#     - x轴标签 xlabel
# - y轴

轴线标签
#!/usr/bin/env python
# coding: utf-8

# In[25]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')


# In[26]:


df = pd.read_csv('DataAnalyst_utf.csv')


# In[27]:


plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False


# In[35]:


data = df.groupby(['city','education']).aggregate(['mean','count']).avg.reset_index()
data.head()


# In[37]:


for city,grouped in data.groupby('city'):
    x = grouped['mean']
    y = grouped['count']
    plt.scatter(x,y,label = city)
plt.legend(loc = 'upper right')
plt.xlabel('平均薪资')
plt.ylabel('招聘数')
plt.legend()
plt.show()


# In[ ]:





图片
#!/usr/bin/env python
# coding: utf-8

# In[2]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')


# In[3]:


df = pd.read_csv('DataAnalyst_utf.csv')


# In[4]:


plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False


# In[7]:


plt.figure(figsize = (12,4))

#第一张图绘制
plt.subplot(221)
plt.plot(np.random.random_integers(-20,20,20),label = '1')
plt.plot(np.random.random_integers(-20,20,20),label = '2')
plt.legend() #标签

#第二张图绘制
plt.subplot(222)
plt.plot(np.random.random_integers(-20,20,20),label = '3')
plt.plot(np.random.random_integers(-20,20,20),label = '4')
plt.legend() #标签


#第三张图绘制
plt.subplot(212)
plt.plot(np.random.random_integers(-20,20,20),label = '5')
plt.legend() #标签


plt.figure(figsize = (12,4))

#第一张图绘制
plt.subplot(221)
plt.plot(np.random.random_integers(-20,20,20),label = '1')
plt.show()


# In[11]:


data = df.groupby(['city','education']).aggregate(['mean','count']).avg.reset_index()


# In[12]:


plt.figure(figsize = (12,4))
plt.subplot(121)
plt.plot(np.random.random_integers(-20,20,20),label = '1')



for city,grouped in data.groupby('city'):
    x = grouped['mean']
    y = grouped['count']
    plt.subplot(122)
    plt.scatter(x,y,label= city)
    plt.scatter(x,y,label = city)
plt.legend(loc = 'upper right')
plt.xlabel('平均薪资')
plt.ylabel('招聘数')
plt.legend()
plt.show()


# In[ ]:
图片1
图片2

你可能感兴趣的:(python基础-26-数据分析python——pandas——可视化)