【python 可视化】数据透视表的用法及画饼图

【python 可视化】数据透视表的用法及画饼图_第1张图片

【python 可视化】数据透视表的用法及画饼图_第2张图片

【python 可视化】数据透视表的用法及画饼图_第3张图片

【python 可视化】数据透视表的用法及画饼图_第4张图片

【python 可视化】数据透视表的用法及画饼图_第5张图片

【python 可视化】数据透视表的用法及画饼图_第6张图片

# encoding: utf-8

###################设置utf-8编码##############
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

####################导入包########
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
##########设置中文显示
from pylab import *
import pandas as pd
mpl.rcParams['font.sans-serif'] = ['SimHei']
font_size =11 # 字体大小
# fig_size = (8, 6) # 图表大小
# 更新字体大小
mpl.rcParams['font.size'] = font_size



#################读取csv格式的数据
data=pd.read_csv("C:/result.csv")




################1.统计每个医生的推荐热度,进行分组(取的平均值)################################
a1=pd.pivot_table(data=data,index=['doctor_name'],values=['doctor_score'],fill_value=0,aggfunc=[np.mean,len])
a2=pd.DataFrame({"doctor_name":a1.index,"score_mean":a1.iloc[:,0],"num":a1.iloc[:,1]})

###########对每个医生的推荐热度进行降序排序
a3=a2.sort_values(by='score_mean',ascending=False)
a3['percent']=(a3['num']/sum(a3['num']))


#########################1、分组区间统计

a5=a3[a3['score_mean']>=4.5]
a5=sum(a5['num'])

a6=a3[(a3['score_mean']>=4.0)&(a3['score_mean']<4.5)]
a6=sum(a6['num'])


a7=a3[(a3['score_mean']>=3.5)&(a3['score_mean']<4.0)]
a7=sum(a7['num'])

a8=a3[a3['score_mean']<3.5]
a8=sum(a8['num'])



# ##########################第1饼图############################
data=[a5,a6,a7,a8]

print data
labels=['4.5-5.0','4.0-4.5','3.5-4.0','3.5以下']
cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色
plt.axis('equal')
plt.pie(data, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'消费者总体认可度分布图')
plt.savefig("D:/a1.png")
plt.show()






#######################2.将这些医生按照科室进行分类,统计每一个科室的人数,占比
b1=pd.DataFrame(data.groupby(['doctor_department'])['doctor_score'].count())
b2=b1.sort_values(by='doctor_score',ascending=False)
b3=b2.index
department=[]
for each in b3:
    # print str(each).replace('华西医院 ','')
    department.append(str(each).replace('华西医院 ',''))
b3=pd.DataFrame({'department':department,"num":b2.iloc[:,0]})
b3['percent']=b3['num']/sum(b3['num'])
print b3

##########################第2饼图############################
data=b3.head(10).iloc[:,1]
labels=b3.head(10).iloc[:,0]
cols = ['c','m','red','blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色
plt.axis('equal')
plt.pie(data, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'top10各科室在电话咨询板块参与度分布图')
plt.savefig("D:/a2.png")
plt.show()

#
#######################3.统计每种职称里的医生人数,占比###############################

c1=pd.DataFrame(data.groupby(['doctor_job'])['doctor_score'].count())
c2=c1.sort_values(by='doctor_score',ascending=False)
c3=c2.index
doctor=[]
for each in c3:
    # print str(each).replace("\n",'未知')
    doctor.append(str(each).replace("\n",'未知'))
c4=pd.DataFrame({'doctor':doctor,"num":c2.iloc[:,0]})
c4['percent']=c4['num']/sum(c4['num'])

print c4

##########################第3饼图############################
data=c4.iloc[:,1]
labels=c4.iloc[:,0]
cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色
plt.axis('equal')
plt.pie(data, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'每种职称下的医生人数在电话咨询板块医生整体水平分布图')
plt.savefig("D:/a3.png")
plt.show()





#############################4将每个科室的推荐热度前3名的医生的擅长部分

d1=b3['department'].head(3)

d1=['华西医院 神经外科','华西医院 肝胆外科','华西医院 心血管内科']

data_sort=data.sort_values(by='doctor_score',ascending=False)



##################################top1第一个人的
import re
d2=data_sort[data_sort['doctor_department']=='华西医院 神经外科']
# print d2.head(3)
# print d2.head(3).iloc[:,4]
d3= d2.head(3).iloc[:,4]
k=[]
for each in d3:
    print each
    print re.findall('\d+',each,re.S)
    k.append(re.findall('\d+',each,re.S))


data3=['22', '19', '3', '1']


labels=['脑肿瘤','椎管内肿瘤','听神经瘤','脑膜瘤']

cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色
plt.axis('equal')
plt.pie(data3, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'top1华西医院 神经外科兰志刚医生擅长的类型比例分布图')
plt.savefig("D:/a5.png")
plt.show()



#############################top2第二个人

import re
d2=data_sort[data_sort['doctor_department']=='华西医院 肝胆外科']
# print d2.head(3)
# print d2.head(3).iloc[:,4]
d3= d2.head(3).iloc[:,4]
k=[]
for each in d3:
    print each
    print re.findall('\d+',each,re.S)
    k.append(re.findall('\d+',each,re.S))


data4=['26', '10', '5', '2']


labels=['肝癌','胆结石','肝血管瘤','肝病']

cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色
plt.axis('equal')
plt.pie(data4, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'top2华西医院 肝胆外科熊先泽医生擅长的类型比例分布图')
plt.savefig("D:/a6.png")
plt.show()


#########################top3第3个人的#########################


import re
d2=data_sort[data_sort['doctor_department']=='华西医院 心血管内科']
# print d2.head(3)
# print d2.head(3).iloc[:,4]
d3= d2.head(3).iloc[:,4]
k=[]
for each in d3:
    print each
    print re.findall('\d+',each,re.S)
    k.append(re.findall('\d+',each,re.S))


data4=['99', '5', '4', '4']


labels=['冠心病','心脏介入','高血压','心脏病']

cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色
plt.axis('equal')
plt.pie(data4, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'top3华西医院 心血管内科贺勇医生擅长的类型比例分布图')
plt.savefig("D:/a7.png")
plt.show()

你可能感兴趣的:(数据科学--python)