import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
import pandas as pd
mpl.rcParams['font.sans-serif'] = ['SimHei']
font_size =11
mpl.rcParams['font.size'] = font_size
data=pd.read_csv("C:/result.csv")
a1=pd.pivot_table(data=data,index=['doctor_name'],values=['doctor_score'],fill_value=0,aggfunc=[np.mean,len])
a2=pd.DataFrame({"doctor_name":a1.index,"score_mean":a1.iloc[:,0],"num":a1.iloc[:,1]})
a3=a2.sort_values(by='score_mean',ascending=False)
a3['percent']=(a3['num']/sum(a3['num']))
a5=a3[a3['score_mean']>=4.5]
a5=sum(a5['num'])
a6=a3[(a3['score_mean']>=4.0)&(a3['score_mean']<4.5)]
a6=sum(a6['num'])
a7=a3[(a3['score_mean']>=3.5)&(a3['score_mean']<4.0)]
a7=sum(a7['num'])
a8=a3[a3['score_mean']<3.5]
a8=sum(a8['num'])
data=[a5,a6,a7,a8]
print data
labels=['4.5-5.0','4.0-4.5','3.5-4.0','3.5以下']
cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']
plt.axis('equal')
plt.pie(data, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'消费者总体认可度分布图')
plt.savefig("D:/a1.png")
plt.show()
b1=pd.DataFrame(data.groupby(['doctor_department'])['doctor_score'].count())
b2=b1.sort_values(by='doctor_score',ascending=False)
b3=b2.index
department=[]
for each in b3:
department.append(str(each).replace('华西医院 ',''))
b3=pd.DataFrame({'department':department,"num":b2.iloc[:,0]})
b3['percent']=b3['num']/sum(b3['num'])
print b3
data=b3.head(10).iloc[:,1]
labels=b3.head(10).iloc[:,0]
cols = ['c','m','red','blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']
plt.axis('equal')
plt.pie(data, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'top10各科室在电话咨询板块参与度分布图')
plt.savefig("D:/a2.png")
plt.show()
c1=pd.DataFrame(data.groupby(['doctor_job'])['doctor_score'].count())
c2=c1.sort_values(by='doctor_score',ascending=False)
c3=c2.index
doctor=[]
for each in c3:
doctor.append(str(each).replace("\n",'未知'))
c4=pd.DataFrame({'doctor':doctor,"num":c2.iloc[:,0]})
c4['percent']=c4['num']/sum(c4['num'])
print c4
data=c4.iloc[:,1]
labels=c4.iloc[:,0]
cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']
plt.axis('equal')
plt.pie(data, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'每种职称下的医生人数在电话咨询板块医生整体水平分布图')
plt.savefig("D:/a3.png")
plt.show()
d1=b3['department'].head(3)
d1=['华西医院 神经外科','华西医院 肝胆外科','华西医院 心血管内科']
data_sort=data.sort_values(by='doctor_score',ascending=False)
import re
d2=data_sort[data_sort['doctor_department']=='华西医院 神经外科']
d3= d2.head(3).iloc[:,4]
k=[]
for each in d3:
print each
print re.findall('\d+',each,re.S)
k.append(re.findall('\d+',each,re.S))
data3=['22', '19', '3', '1']
labels=['脑肿瘤','椎管内肿瘤','听神经瘤','脑膜瘤']
cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']
plt.axis('equal')
plt.pie(data3, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'top1华西医院 神经外科兰志刚医生擅长的类型比例分布图')
plt.savefig("D:/a5.png")
plt.show()
import re
d2=data_sort[data_sort['doctor_department']=='华西医院 肝胆外科']
d3= d2.head(3).iloc[:,4]
k=[]
for each in d3:
print each
print re.findall('\d+',each,re.S)
k.append(re.findall('\d+',each,re.S))
data4=['26', '10', '5', '2']
labels=['肝癌','胆结石','肝血管瘤','肝病']
cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']
plt.axis('equal')
plt.pie(data4, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'top2华西医院 肝胆外科熊先泽医生擅长的类型比例分布图')
plt.savefig("D:/a6.png")
plt.show()
import re
d2=data_sort[data_sort['doctor_department']=='华西医院 心血管内科']
d3= d2.head(3).iloc[:,4]
k=[]
for each in d3:
print each
print re.findall('\d+',each,re.S)
k.append(re.findall('\d+',each,re.S))
data4=['99', '5', '4', '4']
labels=['冠心病','心脏介入','高血压','心脏病']
cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']
plt.axis('equal')
plt.pie(data4, labels=labels, autopct='%1.1f%%',colors=cols)
plt.title(u'top3华西医院 心血管内科贺勇医生擅长的类型比例分布图')
plt.savefig("D:/a7.png")
plt.show()