df_train = pd.read_csv('data.csv',encoding = 'gbk')
df_train
print(df_train.columns)
print(df_train['房价'].describe())
seaborn.set(font=myfont.get_name())
plt.figure(figsize=(16,8),dpi=600)
plt.ylabel('频数')
plt.title('房价分布')
seaborn.distplot(df_train['房价'],kde=False,hist_kws={"label":"频数"})
plt.legend()
data = pd.concat([df_train['房价'],df_train['居住面积']],axis=1)
plt.figure(figsize = (16,8),dpi = 600)
seaborn.scatterplot(data['居住面积'],data['房价'])
data = pd.concat([df_train['房价'],df_train['地下室总面积']],axis =1)
plt.figure(figsize=(16,8),dpi=600)
seaborn.scatterplot(data['地下室总面积'],data['房价'])
data = pd.concat([df_train['房价'],df_train['材料和质量']],axis =1)
plt.figure(figsize =(16,8),dpi = 600)
seaborn.boxplot(data=data,x='材料和质量',y='房价')
data = pd.concat([df_train['房价'],df_train['原施工日期']],axis = 1)
plt.figure(figsize=(16,8),dpi=600)
plt.xticks(rotation=90)
seaborn.boxplot(data=data,x='原施工日期',y='房价')
data = pd.concat([df_train['房价'],df_train['街区']],axis = 1)
plt.figure(figsize=(16,8),dpi=600)
plt.xticks(rotation=90)
seaborn.boxplot(data=data,x='街区',y='房价')
corrmat=df_train.corr()
plt.figure(figsize=(16,8),dpi=600)
seaborn.heatmap(corrmat,square=True,cmap='YlGnBu',xticklabels=True,yticklabels=True)
找出与房价的相关系数排名靠前的10个特征,画出这10个特征之间的热力图
k = 10
cols = corrmat.nlargest(k,'房价')['房价'].index
cm = np.corrcoef(df_train[cols].values.T)
plt.figure(figsize=(16,8),dpi=600)
hm = seaborn.heatmap(cm,annot=True,square=True,fmt='.2f',annot_kws={'size':10},
yticklabels=cols.values,xticklabels=cols.values,cmap='YlGnBu')