比赛给的数据集给与的是关于某个地区的房屋数据,要求运用机器学习的知识给出合理的房价预测
首先导入所需要的包
import pandas as pd
import numpy as np
import xgboost as xgb
import seaborn as sns
sns.set_style('whitegrid',{'font.sans-serif':['simhei','Arial']})
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing
from sklearn import linear_model, svm, gaussian_process
from sklearn.ensemble import RandomForestRegressor
from sklearn.cross_validation import train_test_split
import lightgbm as lgb
from sklearn import cross_validation, metrics
from sklearn.linear_model import LinearRegression
c:\program files\python36\lib\site-packages\sklearn\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
train_data=pd.read_csv('./train.csv')
test_data=pd.read_csv('./test.csv')
train_data.head()
|
时间 |
小区名 |
小区房屋出租数量 |
楼层 |
总楼层 |
房屋面积 |
房屋朝向 |
居住状态 |
卧室数量 |
厅的数量 |
卫的数量 |
出租方式 |
区 |
位置 |
地铁线路 |
地铁站点 |
距离 |
装修情况 |
月租金 |
0 |
1 |
3072 |
0.128906 |
2 |
0.236364 |
0.008628 |
东南 |
NaN |
1 |
1 |
1 |
NaN |
11.0 |
118.0 |
2.0 |
40.0 |
0.764167 |
NaN |
5.602716 |
1 |
1 |
3152 |
0.132812 |
1 |
0.381818 |
0.017046 |
东 |
NaN |
1 |
0 |
0 |
NaN |
10.0 |
100.0 |
4.0 |
58.0 |
0.709167 |
NaN |
16.977929 |
2 |
1 |
5575 |
0.042969 |
0 |
0.290909 |
0.010593 |
东南 |
NaN |
2 |
1 |
2 |
NaN |
12.0 |
130.0 |
5.0 |
37.0 |
0.572500 |
NaN |
8.998302 |
3 |
1 |
3103 |
0.085938 |
2 |
0.581818 |
0.019199 |
南 |
NaN |
3 |
2 |
2 |
NaN |
7.0 |
90.0 |
2.0 |
63.0 |
0.658333 |
NaN |
5.602716 |
4 |
1 |
5182 |
0.214844 |
0 |
0.545455 |
0.010427 |
东北 |
NaN |
2 |
1 |
1 |
NaN |
3.0 |
31.0 |
NaN |
NaN |
NaN |
NaN |
7.300509 |
train_data.describe()
|
时间 |
小区名 |
小区房屋出租数量 |
楼层 |
总楼层 |
房屋面积 |
居住状态 |
卧室数量 |
厅的数量 |
卫的数量 |
出租方式 |
区 |
位置 |
地铁线路 |
地铁站点 |
距离 |
装修情况 |
月租金 |
count |
196539.000000 |
196539.000000 |
195538.000000 |
196539.000000 |
196539.000000 |
196539.000000 |
20138.000000 |
196539.000000 |
196539.000000 |
196539.000000 |
24230.000000 |
196508.000000 |
196508.000000 |
91778.000000 |
91778.000000 |
91778.000000 |
18492.000000 |
196539.000000 |
mean |
2.115229 |
3224.116562 |
0.124151 |
0.955449 |
0.408711 |
0.013139 |
2.725196 |
2.236635 |
1.299625 |
1.223818 |
0.900289 |
7.905139 |
67.945982 |
3.284850 |
57.493735 |
0.551202 |
3.589228 |
7.949313 |
std |
0.786980 |
2023.073726 |
0.133299 |
0.851511 |
0.183100 |
0.008104 |
0.667763 |
0.896961 |
0.613169 |
0.487234 |
0.299621 |
4.025696 |
43.522394 |
1.477147 |
35.191414 |
0.247268 |
1.996912 |
6.310609 |
min |
1.000000 |
0.000000 |
0.007812 |
0.000000 |
0.000000 |
0.000000 |
1.000000 |
0.000000 |
0.000000 |
0.000000 |
0.000000 |
0.000000 |
0.000000 |
1.000000 |
1.000000 |
0.001667 |
1.000000 |
0.000000 |
25% |
1.000000 |
1388.000000 |
0.039062 |
0.000000 |
0.290909 |
0.009268 |
3.000000 |
2.000000 |
1.000000 |
1.000000 |
1.000000 |
4.000000 |
33.000000 |
2.000000 |
23.000000 |
0.356667 |
2.000000 |
4.923599 |
50% |
2.000000 |
3086.000000 |
0.082031 |
1.000000 |
0.418182 |
0.012910 |
3.000000 |
2.000000 |
1.000000 |
1.000000 |
1.000000 |
9.000000 |
61.000000 |
4.000000 |
59.000000 |
0.554167 |
2.000000 |
6.621392 |
75% |
3.000000 |
5199.000000 |
0.160156 |
2.000000 |
0.563636 |
0.014896 |
3.000000 |
3.000000 |
2.000000 |
1.000000 |
1.000000 |
11.000000 |
103.000000 |
5.000000 |
87.000000 |
0.745833 |
6.000000 |
8.998302 |
max |
3.000000 |
6627.000000 |
1.000000 |
2.000000 |
1.000000 |
1.000000 |
3.000000 |
11.000000 |
8.000000 |
8.000000 |
1.000000 |
14.000000 |
152.000000 |
5.000000 |
119.000000 |
1.000000 |
6.000000 |
100.000000 |
train_data.info()
RangeIndex: 196539 entries, 0 to 196538
Data columns (total 19 columns):
时间 196539 non-null int64
小区名 196539 non-null int64
小区房屋出租数量 195538 non-null float64
楼层 196539 non-null int64
总楼层 196539 non-null float64
房屋面积 196539 non-null float64
房屋朝向 196539 non-null object
居住状态 20138 non-null float64
卧室数量 196539 non-null int64
厅的数量 196539 non-null int64
卫的数量 196539 non-null int64
出租方式 24230 non-null float64
区 196508 non-null float64
位置 196508 non-null float64
地铁线路 91778 non-null float64
地铁站点 91778 non-null float64
距离 91778 non-null float64
装修情况 18492 non-null float64
月租金 196539 non-null float64
dtypes: float64(12), int64(6), object(1)
memory usage: 28.5+ MB
corrmat = train_data.corr()
f, ax = plt.subplots(figsize=(20, 9))
sns.heatmap(corrmat, vmax=0.8, square=True)
train_data=pd.read_csv('./train.csv')
test_data=pd.read_csv('./test.csv')
train_data=pd.DataFrame(train_data).fillna(0)
result=train_data['月租金'].values.reshape(-1,1)
del train_data['房屋朝向']
del test_data['房屋朝向']
train_data=train_data.values
test_data=test_data.fillna(0).values
lr=LinearRegression()
lr.fit(train_data,result)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
train_data.shape
(196539, 18)
prediction=lr.predict(test_data)
prediction
array([[ 6.00000000e+00],
[-3.05193784e-11],
[-1.35930288e-11],
...,
[-5.85045654e-11],
[-1.61906537e-10],
[-1.27143911e-10]])
Del_columns = ['时间', '小区名', '小区房屋出租数量', '居住状态', '月租金', '装修情况']
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
for i in Del_columns:
del train_data[i]
if i!='月租金':
del test_data[i]
del test_data['id']
train_data.head()
|
楼层 |
总楼层 |
房屋面积 |
房屋朝向 |
卧室数量 |
厅的数量 |
卫的数量 |
出租方式 |
区 |
位置 |
地铁线路 |
地铁站点 |
距离 |
0 |
2 |
0.236364 |
0.008628 |
东南 |
1 |
1 |
1 |
NaN |
11.0 |
118.0 |
2.0 |
40.0 |
0.764167 |
1 |
1 |
0.381818 |
0.017046 |
东 |
1 |
0 |
0 |
NaN |
10.0 |
100.0 |
4.0 |
58.0 |
0.709167 |
2 |
0 |
0.290909 |
0.010593 |
东南 |
2 |
1 |
2 |
NaN |
12.0 |
130.0 |
5.0 |
37.0 |
0.572500 |
3 |
2 |
0.581818 |
0.019199 |
南 |
3 |
2 |
2 |
NaN |
7.0 |
90.0 |
2.0 |
63.0 |
0.658333 |
4 |
0 |
0.545455 |
0.010427 |
东北 |
2 |
1 |
1 |
NaN |
3.0 |
31.0 |
NaN |
NaN |
NaN |
test_data.head()
|
楼层 |
总楼层 |
房屋面积 |
房屋朝向 |
卧室数量 |
厅的数量 |
卫的数量 |
出租方式 |
区 |
位置 |
地铁线路 |
地铁站点 |
距离 |
0 |
1 |
0.600000 |
0.007117 |
东 |
2 |
1 |
1 |
1.0 |
10.0 |
5.0 |
NaN |
NaN |
NaN |
1 |
1 |
0.472727 |
0.007448 |
东 |
2 |
1 |
1 |
NaN |
3.0 |
0.0 |
NaN |
NaN |
NaN |
2 |
2 |
0.709091 |
0.014068 |
东南 |
3 |
2 |
2 |
NaN |
10.0 |
9.0 |
4.0 |
74.0 |
0.400833 |
3 |
0 |
0.090909 |
0.008937 |
南 |
2 |
1 |
1 |
NaN |
6.0 |
96.0 |
5.0 |
17.0 |
0.384167 |
4 |
1 |
0.218182 |
0.008606 |
东南 |
2 |
1 |
1 |
NaN |
6.0 |
61.0 |
3.0 |
114.0 |
0.598333 |
def split_map(str):
return str.strip().split(' ')
def check_bool(arr,str):
bool_list=[]
for i in arr:
if str in i:
bool_list.append(True)
else:
bool_list.append(False)
return bool_list
def processData(data):
temp=data['房屋朝向'].map(lambda x:split_map(x))
data['东']=0
data['南']=0
data['西']=0
data['北']=0
data['东南']=0
data['东北']=0
data['西南']=0
data['西北']=0
bool_dong=check_bool(temp,'东')
bool_nan=check_bool(temp,'南')
bool_xi=check_bool(temp,'西')
bool_bei=check_bool(temp,'北')
bool_db=check_bool(temp,'东南')
bool_dn=check_bool(temp,'东北')
bool_xb=check_bool(temp,'西南')
bool_xn=check_bool(temp,'西北')
data.loc[bool_dong,'东']=1
data.loc[bool_xi,'西']=1
data.loc[bool_nan,'南']=1
data.loc[bool_bei,'北']=1
data.loc[bool_db,'东北']=1
data.loc[bool_dn,'东南']=1
data.loc[bool_xb,'西北']=1
data.loc[bool_xn,'西南']=1
del data['房屋朝向']
return data
train_data=processData(train_data)
test_data=processData(test_data)
train_data.head()
|
楼层 |
总楼层 |
房屋面积 |
卧室数量 |
厅的数量 |
卫的数量 |
出租方式 |
区 |
位置 |
地铁线路 |
地铁站点 |
距离 |
东 |
南 |
西 |
北 |
东南 |
东北 |
西南 |
西北 |
0 |
2 |
0.236364 |
0.008628 |
1 |
1 |
1 |
NaN |
11.0 |
118.0 |
2.0 |
40.0 |
0.764167 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
1 |
1 |
0.381818 |
0.017046 |
1 |
0 |
0 |
NaN |
10.0 |
100.0 |
4.0 |
58.0 |
0.709167 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2 |
0 |
0.290909 |
0.010593 |
2 |
1 |
2 |
NaN |
12.0 |
130.0 |
5.0 |
37.0 |
0.572500 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
3 |
2 |
0.581818 |
0.019199 |
3 |
2 |
2 |
NaN |
7.0 |
90.0 |
2.0 |
63.0 |
0.658333 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
4 |
0 |
0.545455 |
0.010427 |
2 |
1 |
1 |
NaN |
3.0 |
31.0 |
NaN |
NaN |
NaN |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
test_data.head()
|
楼层 |
总楼层 |
房屋面积 |
卧室数量 |
厅的数量 |
卫的数量 |
出租方式 |
区 |
位置 |
地铁线路 |
地铁站点 |
距离 |
东 |
南 |
西 |
北 |
东南 |
东北 |
西南 |
西北 |
0 |
1 |
0.600000 |
0.007117 |
2 |
1 |
1 |
1.0 |
10.0 |
5.0 |
NaN |
NaN |
NaN |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
1 |
0.472727 |
0.007448 |
2 |
1 |
1 |
NaN |
3.0 |
0.0 |
NaN |
NaN |
NaN |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2 |
2 |
0.709091 |
0.014068 |
3 |
2 |
2 |
NaN |
10.0 |
9.0 |
4.0 |
74.0 |
0.400833 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
3 |
0 |
0.090909 |
0.008937 |
2 |
1 |
1 |
NaN |
6.0 |
96.0 |
5.0 |
17.0 |
0.384167 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
4 |
1 |
0.218182 |
0.008606 |
2 |
1 |
1 |
NaN |
6.0 |
61.0 |
3.0 |
114.0 |
0.598333 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
train_data=train_data.fillna(0)
test_data=train_data.fillna(0)
train_data=train_data.values
test_data=test_data.values
train_result=pd.read_csv('train.csv')['月租金'].values
用xgbooster模型进行训练
X_train, X_test, y_train, y_test = cross_validation.train_test_split(train_data, train_result, test_size=0.1,
random_state=2333)
xgb_val = xgb.DMatrix(X_test, label=y_test)
xgb_train = xgb.DMatrix(X_train, label=y_train)
xgb_test = xgb.DMatrix(test_data)
params = {
'booster': 'gbtree',
'objective': 'reg:linear',
'n_estimators': 2000,
'gamma': 0.2,
'max_depth': 10,
"reg_alpha": 3,
'lambda': 5,
'subsample': 0.9,
'colsample_bytree': 0.6,
'colsample_bylevel': 0.7,
'min_child_weight': 7,
'silent': 1,
'eta': 0.05,
'seed': 2017,
}
plst = list(params.items())
num_rounds = 10000
watchlist = [(xgb_train, 'train'), (xgb_val, 'val')]
model = xgb.train(plst, xgb_train, num_rounds, watchlist, early_stopping_rounds=300, verbose_eval=50, )
model.save_model('xgb.model')
print("模型训练完成")
print("训练完毕,开始预测")
test_result = model.predict(xgb_test, ntree_limit=model.best_ntree_limit)
data_df = pd.DataFrame(test_result)
filename = 'result_xgb.csv'
data_df.to_csv(filename, encoding='utf-8')
[0] train-rmse:9.3703 val-rmse:9.49998
Multiple eval metrics have been passed: 'val-rmse' will be used for early stopping.
Will train until val-rmse hasn't improved in 300 rounds.
[50] train-rmse:2.77386 val-rmse:2.95125
[100] train-rmse:2.22017 val-rmse:2.42123
[150] train-rmse:2.05228 val-rmse:2.26014
[200] train-rmse:1.95506 val-rmse:2.16923
[250] train-rmse:1.87808 val-rmse:2.09895
[300] train-rmse:1.81004 val-rmse:2.03828
[350] train-rmse:1.75105 val-rmse:1.98529
[400] train-rmse:1.69493 val-rmse:1.93967
[450] train-rmse:1.64521 val-rmse:1.90085
[500] train-rmse:1.59184 val-rmse:1.85845
[550] train-rmse:1.54979 val-rmse:1.82564
[600] train-rmse:1.51416 val-rmse:1.79824
[650] train-rmse:1.476 val-rmse:1.76895
[700] train-rmse:1.44457 val-rmse:1.74357
[750] train-rmse:1.41372 val-rmse:1.72097
[800] train-rmse:1.3867 val-rmse:1.69965
[850] train-rmse:1.36016 val-rmse:1.67913
[900] train-rmse:1.3367 val-rmse:1.6619
[950] train-rmse:1.31308 val-rmse:1.64355
[1000] train-rmse:1.2914 val-rmse:1.62736
[1050] train-rmse:1.27203 val-rmse:1.61333
[1100] train-rmse:1.2507 val-rmse:1.59796
[1150] train-rmse:1.23164 val-rmse:1.58435
[1200] train-rmse:1.21557 val-rmse:1.57225
[1250] train-rmse:1.20111 val-rmse:1.56099
[1300] train-rmse:1.18301 val-rmse:1.54762
[1350] train-rmse:1.1699 val-rmse:1.53814
[1400] train-rmse:1.15637 val-rmse:1.52793
[1450] train-rmse:1.14323 val-rmse:1.51842
[1500] train-rmse:1.13196 val-rmse:1.51052
[1550] train-rmse:1.11936 val-rmse:1.50133
[1600] train-rmse:1.10928 val-rmse:1.49404
[1650] train-rmse:1.09593 val-rmse:1.48425
[1700] train-rmse:1.08563 val-rmse:1.4771
[1750] train-rmse:1.07649 val-rmse:1.47056
[1800] train-rmse:1.06619 val-rmse:1.46278
[1850] train-rmse:1.05751 val-rmse:1.45715
[1900] train-rmse:1.04786 val-rmse:1.45038
[1950] train-rmse:1.03764 val-rmse:1.44344
[2000] train-rmse:1.02981 val-rmse:1.43775
[2050] train-rmse:1.02199 val-rmse:1.43245
[2100] train-rmse:1.01467 val-rmse:1.42699
[2150] train-rmse:1.00615 val-rmse:1.42124
[2200] train-rmse:0.997202 val-rmse:1.41497
[2250] train-rmse:0.99015 val-rmse:1.41046
[2300] train-rmse:0.98291 val-rmse:1.40615
[2350] train-rmse:0.976352 val-rmse:1.40207
[2400] train-rmse:0.969466 val-rmse:1.39734
[2450] train-rmse:0.964016 val-rmse:1.39344
[2500] train-rmse:0.958046 val-rmse:1.38977
[2550] train-rmse:0.952188 val-rmse:1.38579
[2600] train-rmse:0.945662 val-rmse:1.38188
[2650] train-rmse:0.939815 val-rmse:1.37778
[2700] train-rmse:0.934951 val-rmse:1.37506
[2750] train-rmse:0.92956 val-rmse:1.37186
[2800] train-rmse:0.925032 val-rmse:1.36926
[2850] train-rmse:0.920306 val-rmse:1.36628
[2900] train-rmse:0.915933 val-rmse:1.36362
[2950] train-rmse:0.912063 val-rmse:1.36091
[3000] train-rmse:0.907578 val-rmse:1.3581
[3050] train-rmse:0.902738 val-rmse:1.35514
[3100] train-rmse:0.899267 val-rmse:1.35305
[3150] train-rmse:0.895539 val-rmse:1.35101
[3200] train-rmse:0.892583 val-rmse:1.34929
[3250] train-rmse:0.888493 val-rmse:1.3464
[3300] train-rmse:0.885463 val-rmse:1.34454
[3350] train-rmse:0.882028 val-rmse:1.34274
[3400] train-rmse:0.87818 val-rmse:1.34027
[3450] train-rmse:0.875323 val-rmse:1.33839
[3500] train-rmse:0.871904 val-rmse:1.33632
[3550] train-rmse:0.868129 val-rmse:1.33422
[3600] train-rmse:0.865097 val-rmse:1.33224
[3650] train-rmse:0.862143 val-rmse:1.33029
[3700] train-rmse:0.8593 val-rmse:1.32906
[3750] train-rmse:0.85648 val-rmse:1.32757
[3800] train-rmse:0.854131 val-rmse:1.3263
[3850] train-rmse:0.851365 val-rmse:1.32452
[3900] train-rmse:0.848457 val-rmse:1.32238
[3950] train-rmse:0.845943 val-rmse:1.32112
[4000] train-rmse:0.843617 val-rmse:1.31984
[4050] train-rmse:0.84074 val-rmse:1.3184
[4100] train-rmse:0.838503 val-rmse:1.31692
[4150] train-rmse:0.836258 val-rmse:1.31524
[4200] train-rmse:0.83361 val-rmse:1.31377
[4250] train-rmse:0.831158 val-rmse:1.31241
[4300] train-rmse:0.828668 val-rmse:1.31086
[4350] train-rmse:0.82635 val-rmse:1.30916
[4400] train-rmse:0.824288 val-rmse:1.30784
[4450] train-rmse:0.822547 val-rmse:1.30679
[4500] train-rmse:0.820341 val-rmse:1.3061
[4550] train-rmse:0.818466 val-rmse:1.30479
[4600] train-rmse:0.816483 val-rmse:1.30372
[4650] train-rmse:0.814547 val-rmse:1.30289
[4700] train-rmse:0.812584 val-rmse:1.30213
[4750] train-rmse:0.810988 val-rmse:1.30125
[4800] train-rmse:0.809294 val-rmse:1.30042
[4850] train-rmse:0.807515 val-rmse:1.29971
[4900] train-rmse:0.805787 val-rmse:1.29873
[4950] train-rmse:0.804336 val-rmse:1.29787
[5000] train-rmse:0.802841 val-rmse:1.29688
[5050] train-rmse:0.801239 val-rmse:1.29602
[5100] train-rmse:0.799495 val-rmse:1.29525
[5150] train-rmse:0.797696 val-rmse:1.29396
[5200] train-rmse:0.796141 val-rmse:1.29346
[5250] train-rmse:0.794911 val-rmse:1.29268
[5300] train-rmse:0.793145 val-rmse:1.29126
[5350] train-rmse:0.791934 val-rmse:1.29083
[5400] train-rmse:0.790233 val-rmse:1.28989
[5450] train-rmse:0.788694 val-rmse:1.28888
[5500] train-rmse:0.786967 val-rmse:1.28807
[5550] train-rmse:0.785497 val-rmse:1.28732
[5600] train-rmse:0.784078 val-rmse:1.28612
[5650] train-rmse:0.782551 val-rmse:1.28536
[5700] train-rmse:0.781073 val-rmse:1.28481
[5750] train-rmse:0.779708 val-rmse:1.28382
[5800] train-rmse:0.778309 val-rmse:1.28302
[5850] train-rmse:0.776918 val-rmse:1.28268
[5900] train-rmse:0.775676 val-rmse:1.28189
[5950] train-rmse:0.774513 val-rmse:1.28112
[6000] train-rmse:0.773367 val-rmse:1.28045
[6050] train-rmse:0.772218 val-rmse:1.27994
[6100] train-rmse:0.771067 val-rmse:1.27933
[6150] train-rmse:0.7696 val-rmse:1.27846
[6200] train-rmse:0.768351 val-rmse:1.27754
[6250] train-rmse:0.767365 val-rmse:1.27706
[6300] train-rmse:0.766297 val-rmse:1.27632
[6350] train-rmse:0.765223 val-rmse:1.27568
[6400] train-rmse:0.764237 val-rmse:1.27524
[6450] train-rmse:0.762684 val-rmse:1.2743
[6500] train-rmse:0.761282 val-rmse:1.27319
[6550] train-rmse:0.760169 val-rmse:1.27264
[6600] train-rmse:0.759052 val-rmse:1.27181
[6650] train-rmse:0.75798 val-rmse:1.2713
[6700] train-rmse:0.756919 val-rmse:1.27098
[6750] train-rmse:0.756039 val-rmse:1.27034
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
in ()
34 # 训练模型并保存
35 # early_stopping_rounds 当设置的迭代次数较大时,early_stopping_rounds 可在一定的迭代次数内准确率没有提升就停止训练
---> 36 model = xgb.train(plst, xgb_train, num_rounds, watchlist, early_stopping_rounds=300, verbose_eval=50, )
37 model.save_model('xgb.model') # 用于存储训练出的模型
38 print("模型训练完成")
c:\program files\python36\lib\site-packages\xgboost\training.py in train(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, xgb_model, callbacks, learning_rates)
202 evals=evals,
203 obj=obj, feval=feval,
--> 204 xgb_model=xgb_model, callbacks=callbacks)
205
206
c:\program files\python36\lib\site-packages\xgboost\training.py in _train_internal(params, dtrain, num_boost_round, evals, obj, feval, xgb_model, callbacks)
72 # Skip the first update if it is a recovery step.
73 if version % 2 == 0:
---> 74 bst.update(dtrain, i, obj)
75 bst.save_rabit_checkpoint()
76 version += 1
c:\program files\python36\lib\site-packages\xgboost\core.py in update(self, dtrain, iteration, fobj)
1019 if fobj is None:
1020 _check_call(_LIB.XGBoosterUpdateOneIter(self.handle, ctypes.c_int(iteration),
-> 1021 dtrain.handle))
1022 else:
1023 pred = self.predict(dtrain)
KeyboardInterrupt:
- 2月12日 补充
在房屋朝向方面,可以运用更简单的方法来进行处理: