from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
data=pd.read_excel('clean.xlsx')
for i in [ '房屋朝向', '所属小区', '装修程度']:
le = LabelEncoder()
le.fit(np.unique(data[i].values))
print(list(le.classes_))
temp=le.transform(data[i].values)
data[i]=pd.DataFrame(temp,columns=[i])
df=data[['单价', '周围公交线路(0.5km)', '周围学校数量(1km)', '容积率', '建造年限', '物业费', '绿化率', '距商圈距离(km)', '距地铁站距离(km)', '距垃圾站距离(km)', '面积']]
for each in [ '房屋朝向', '所属小区', '装修程度', '配套电梯']:
dummies = pd.get_dummies(data[each],prefix = each)
df = df.join(dummies)