XGboost是(Gradient Boosting Decision Tree)梯度提升树的一种实现。
DBDT Model cycle:
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-AXZxsa72-1604407712617)(https://i.imgur.com/e7MIgXk.png)]
import pandas as pd
data= pd.read_csv(r'G:\kaggle\housePrice\train.csv')
data.head()
Id | MSSubClass | MSZoning | LotFrontage | LotArea | Street | Alley | LotShape | LandContour | Utilities | ... | PoolArea | PoolQC | Fence | MiscFeature | MiscVal | MoSold | YrSold | SaleType | SaleCondition | SalePrice | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 60 | RL | 65.0 | 8450 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 2 | 2008 | WD | Normal | 208500 |
1 | 2 | 20 | RL | 80.0 | 9600 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 5 | 2007 | WD | Normal | 181500 |
2 | 3 | 60 | RL | 68.0 | 11250 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 9 | 2008 | WD | Normal | 223500 |
3 | 4 | 70 | RL | 60.0 | 9550 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 2 | 2006 | WD | Abnorml | 140000 |
4 | 5 | 60 | RL | 84.0 | 14260 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 12 | 2008 | WD | Normal | 250000 |
5 rows × 81 columns
data.describe()
Id | MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | ... | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | YrSold | SalePrice | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 1460.000000 | 1460.000000 | 1201.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1452.000000 | 1460.000000 | ... | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 |
mean | 730.500000 | 56.897260 | 70.049958 | 10516.828082 | 6.099315 | 5.575342 | 1971.267808 | 1984.865753 | 103.685262 | 443.639726 | ... | 94.244521 | 46.660274 | 21.954110 | 3.409589 | 15.060959 | 2.758904 | 43.489041 | 6.321918 | 2007.815753 | 180921.195890 |
std | 421.610009 | 42.300571 | 24.284752 | 9981.264932 | 1.382997 | 1.112799 | 30.202904 | 20.645407 | 181.066207 | 456.098091 | ... | 125.338794 | 66.256028 | 61.119149 | 29.317331 | 55.757415 | 40.177307 | 496.123024 | 2.703626 | 1.328095 | 79442.502883 |
min | 1.000000 | 20.000000 | 21.000000 | 1300.000000 | 1.000000 | 1.000000 | 1872.000000 | 1950.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 2006.000000 | 34900.000000 |
25% | 365.750000 | 20.000000 | 59.000000 | 7553.500000 | 5.000000 | 5.000000 | 1954.000000 | 1967.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 5.000000 | 2007.000000 | 129975.000000 |
50% | 730.500000 | 50.000000 | 69.000000 | 9478.500000 | 6.000000 | 5.000000 | 1973.000000 | 1994.000000 | 0.000000 | 383.500000 | ... | 0.000000 | 25.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 6.000000 | 2008.000000 | 163000.000000 |
75% | 1095.250000 | 70.000000 | 80.000000 | 11601.500000 | 7.000000 | 6.000000 | 2000.000000 | 2004.000000 | 166.000000 | 712.250000 | ... | 168.000000 | 68.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 8.000000 | 2009.000000 | 214000.000000 |
max | 1460.000000 | 190.000000 | 313.000000 | 215245.000000 | 10.000000 | 9.000000 | 2010.000000 | 2010.000000 | 1600.000000 | 5644.000000 | ... | 857.000000 | 547.000000 | 552.000000 | 508.000000 | 480.000000 | 738.000000 | 15500.000000 | 12.000000 | 2010.000000 | 755000.000000 |
8 rows × 38 columns
data.SalePrice.isnull().sum() #SalePrice列没有缺失值
0
data.dropna(axis=0, subset=['SalePrice'], inplace= True)
#pandas中很多函数都会有inplace参数
#inplace:是否就地修改(直接对原对象修改)
#inplace= True:直接就地修改
#默认inplace为False,即创建新的对象进行修改,原对象不变
#subset:在哪些列中查看有缺失值
data.describe()
Id | MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | ... | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | YrSold | SalePrice | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 1460.000000 | 1460.000000 | 1201.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1452.000000 | 1460.000000 | ... | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 |
mean | 730.500000 | 56.897260 | 70.049958 | 10516.828082 | 6.099315 | 5.575342 | 1971.267808 | 1984.865753 | 103.685262 | 443.639726 | ... | 94.244521 | 46.660274 | 21.954110 | 3.409589 | 15.060959 | 2.758904 | 43.489041 | 6.321918 | 2007.815753 | 180921.195890 |
std | 421.610009 | 42.300571 | 24.284752 | 9981.264932 | 1.382997 | 1.112799 | 30.202904 | 20.645407 | 181.066207 | 456.098091 | ... | 125.338794 | 66.256028 | 61.119149 | 29.317331 | 55.757415 | 40.177307 | 496.123024 | 2.703626 | 1.328095 | 79442.502883 |
min | 1.000000 | 20.000000 | 21.000000 | 1300.000000 | 1.000000 | 1.000000 | 1872.000000 | 1950.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 2006.000000 | 34900.000000 |
25% | 365.750000 | 20.000000 | 59.000000 | 7553.500000 | 5.000000 | 5.000000 | 1954.000000 | 1967.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 5.000000 | 2007.000000 | 129975.000000 |
50% | 730.500000 | 50.000000 | 69.000000 | 9478.500000 | 6.000000 | 5.000000 | 1973.000000 | 1994.000000 | 0.000000 | 383.500000 | ... | 0.000000 | 25.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 6.000000 | 2008.000000 | 163000.000000 |
75% | 1095.250000 | 70.000000 | 80.000000 | 11601.500000 | 7.000000 | 6.000000 | 2000.000000 | 2004.000000 | 166.000000 | 712.250000 | ... | 168.000000 | 68.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 8.000000 | 2009.000000 | 214000.000000 |
max | 1460.000000 | 190.000000 | 313.000000 | 215245.000000 | 10.000000 | 9.000000 | 2010.000000 | 2010.000000 | 1600.000000 | 5644.000000 | ... | 857.000000 | 547.000000 | 552.000000 | 508.000000 | 480.000000 | 738.000000 | 15500.000000 | 12.000000 | 2010.000000 | 755000.000000 |
8 rows × 38 columns
#target
y= data['SalePrice']
#X
#并除去非数值列
X= data.drop(['SalePrice'], axis=1).select_dtypes(exclude=['object'])
#缺失值处理——impute法
from sklearn.impute import SimpleImputer
my_imputer= SimpleImputer()#missing_values=np.NaN, strategy='mean'
X_with_imputed_values= my_imputer.fit_transform(X)
#split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split( X_with_imputed_values, y, test_size=0.33)
#model
from xgboost import XGBRegressor #注意:是小写的xgb
model= XGBRegressor()
model
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
silent=True, subsample=1)
#fit
clf= model.fit(X_train, y_train)
#evaluation1
clf.score(X_test, y_test)
0.842475147639184
#make predictions
predictions= clf.predict(X_test)
#evlaustion2
from sklearn.metrics import mean_absolute_error
mean_absolute_error(predictions, y_test)
17306.50893088693
调两个重要的参数:n_estimators and early_stopping_rounds
n_estimators:
上面描述的modeling cycle的次数, 即总共迭代的次数,也即决策树的个数(如果选择的基学习器是决策树)
太小易underfitting
太大易overfitting
typical value range 100-1000, 这取决于下面讨论的learning rate。
early_stopping_rounds:
早停策略
early_stopping_rounds=5 #如果验证集eval_set的score在5轮内都没有提高(或者误差没有降低),则提前停止迭代,结束模型训练。
调参:防止过拟合
set a high value for n_estimators and then use early_stopping_rounds to find the optimal time to stop iterating是一个明智之举。
improved_model= XGBRegressor(booster='gbtree', #gbtree树模型做基学习器(默认) booster='gbliner'线性模型做基学习器
n_estimators=700, #700个基学习器,迭代700轮(fit中展示训练情况,然后不断试)
silent=True, #不输出中间结果(默认),沉默嘛
learning_rate=0.1) #学习率(步长) 默认0.1
improved_model.fit(X_train, y_train,
#early_stopping_rounds=10,#10次误差不下降了,停止在78次迭代,But再试着继续迭代,后面误差还是会降低???????SO这个怎么用?
eval_set=[(X_test, y_test)], #验证集,还得加个()
verbose= True) #展示validation metrics,输出到终端 如本次的rmse值
[0] validation_0-rmse:171354
[1] validation_0-rmse:155035
[2] validation_0-rmse:140105
[3] validation_0-rmse:127156
[4] validation_0-rmse:115501
[5] validation_0-rmse:104982
[6] validation_0-rmse:95640.1
[7] validation_0-rmse:87385.7
[8] validation_0-rmse:79873.8
[9] validation_0-rmse:73282.3
[10] validation_0-rmse:67515.6
[11] validation_0-rmse:62389.5
[12] validation_0-rmse:57831.5
[13] validation_0-rmse:53889.4
[14] validation_0-rmse:50267.4
[15] validation_0-rmse:47156
[16] validation_0-rmse:44856.7
[17] validation_0-rmse:42583.1
[18] validation_0-rmse:40623.5
[19] validation_0-rmse:38994.8
[20] validation_0-rmse:37508.2
[21] validation_0-rmse:36328.1
[22] validation_0-rmse:35230.5
[23] validation_0-rmse:34482.7
[24] validation_0-rmse:33812.3
[25] validation_0-rmse:33117.6
[26] validation_0-rmse:32555.1
[27] validation_0-rmse:32110.1
[28] validation_0-rmse:31723.5
[29] validation_0-rmse:31453.6
[30] validation_0-rmse:31251.2
[31] validation_0-rmse:31114.1
[32] validation_0-rmse:30839.7
[33] validation_0-rmse:30671.4
[34] validation_0-rmse:30502.4
[35] validation_0-rmse:30366.8
[36] validation_0-rmse:30261.1
[37] validation_0-rmse:30134.2
[38] validation_0-rmse:30074.4
[39] validation_0-rmse:29955.6
[40] validation_0-rmse:29821.9
[41] validation_0-rmse:29768.3
[42] validation_0-rmse:29677.8
[43] validation_0-rmse:29747
[44] validation_0-rmse:29762.3
[45] validation_0-rmse:29687.5
[46] validation_0-rmse:29597.7
[47] validation_0-rmse:29572.5
[48] validation_0-rmse:29499.4
[49] validation_0-rmse:29490.6
[50] validation_0-rmse:29431.2
[51] validation_0-rmse:29456.1
[52] validation_0-rmse:29418.4
[53] validation_0-rmse:29429.7
[54] validation_0-rmse:29420.7
[55] validation_0-rmse:29352.5
[56] validation_0-rmse:29334.4
[57] validation_0-rmse:29327
[58] validation_0-rmse:29353.8
[59] validation_0-rmse:29313.8
[60] validation_0-rmse:29265
[61] validation_0-rmse:29251.9
[62] validation_0-rmse:29278.7
[63] validation_0-rmse:29232.1
[64] validation_0-rmse:29241.9
[65] validation_0-rmse:29225
[66] validation_0-rmse:29224.7
[67] validation_0-rmse:29276.3
[68] validation_0-rmse:29296.9
[69] validation_0-rmse:29268.3
[70] validation_0-rmse:29243.8
[71] validation_0-rmse:29254.7
[72] validation_0-rmse:29268.9
[73] validation_0-rmse:29244.3
[74] validation_0-rmse:29253.9
[75] validation_0-rmse:29198.6
[76] validation_0-rmse:29186.1
[77] validation_0-rmse:29192
[78] validation_0-rmse:29131.9
[79] validation_0-rmse:29134
[80] validation_0-rmse:29154.9
[81] validation_0-rmse:29162.2
[82] validation_0-rmse:29143.9
[83] validation_0-rmse:29154
[84] validation_0-rmse:29169.5
[85] validation_0-rmse:29174.1
[86] validation_0-rmse:29178.5
[87] validation_0-rmse:29184.7
[88] validation_0-rmse:29194.5
[89] validation_0-rmse:29174.5
[90] validation_0-rmse:29147
[91] validation_0-rmse:29153.5
[92] validation_0-rmse:29115.3
[93] validation_0-rmse:29092.4
[94] validation_0-rmse:29096.9
[95] validation_0-rmse:29049.2
[96] validation_0-rmse:29035.4
[97] validation_0-rmse:29027.6
[98] validation_0-rmse:29034.5
[99] validation_0-rmse:29029.3
[100] validation_0-rmse:29014.2
[101] validation_0-rmse:29019.8
[102] validation_0-rmse:29032.2
[103] validation_0-rmse:29058.4
[104] validation_0-rmse:29050.9
[105] validation_0-rmse:29044.3
[106] validation_0-rmse:29011.1
[107] validation_0-rmse:29000.5
[108] validation_0-rmse:28990
[109] validation_0-rmse:28989.8
[110] validation_0-rmse:28997.3
[111] validation_0-rmse:28955.4
[112] validation_0-rmse:28962.5
[113] validation_0-rmse:29004.7
[114] validation_0-rmse:28986.9
[115] validation_0-rmse:28965
[116] validation_0-rmse:28934.8
[117] validation_0-rmse:28924.8
[118] validation_0-rmse:28922
[119] validation_0-rmse:28897
[120] validation_0-rmse:28878.8
[121] validation_0-rmse:28889.1
[122] validation_0-rmse:28970.5
[123] validation_0-rmse:28983.8
[124] validation_0-rmse:28978
[125] validation_0-rmse:28970.3
[126] validation_0-rmse:28980.8
[127] validation_0-rmse:28975
[128] validation_0-rmse:28985.7
[129] validation_0-rmse:28977.6
[130] validation_0-rmse:28913.9
[131] validation_0-rmse:28911.1
[132] validation_0-rmse:28899.4
[133] validation_0-rmse:28920.1
[134] validation_0-rmse:28925.3
[135] validation_0-rmse:28943.1
[136] validation_0-rmse:28940.2
[137] validation_0-rmse:28950.3
[138] validation_0-rmse:28990.8
[139] validation_0-rmse:28985.3
[140] validation_0-rmse:29013.2
[141] validation_0-rmse:29006.1
[142] validation_0-rmse:29014
[143] validation_0-rmse:29032.2
[144] validation_0-rmse:29015.3
[145] validation_0-rmse:29015.4
[146] validation_0-rmse:29031.8
[147] validation_0-rmse:29023
[148] validation_0-rmse:29024.2
[149] validation_0-rmse:29044.8
[150] validation_0-rmse:29035.8
[151] validation_0-rmse:29046.7
[152] validation_0-rmse:29022.3
[153] validation_0-rmse:29024.6
[154] validation_0-rmse:29021.1
[155] validation_0-rmse:29028
[156] validation_0-rmse:29021
[157] validation_0-rmse:28965.1
[158] validation_0-rmse:28969.9
[159] validation_0-rmse:28930
[160] validation_0-rmse:28924.3
[161] validation_0-rmse:28929.7
[162] validation_0-rmse:28931.4
[163] validation_0-rmse:28925
[164] validation_0-rmse:28924.5
[165] validation_0-rmse:28921.3
[166] validation_0-rmse:28946.2
[167] validation_0-rmse:28943.3
[168] validation_0-rmse:28940.3
[169] validation_0-rmse:28947.2
[170] validation_0-rmse:28935.8
[171] validation_0-rmse:28938.7
[172] validation_0-rmse:28932.8
[173] validation_0-rmse:28934.6
[174] validation_0-rmse:28928.2
[175] validation_0-rmse:28928.9
[176] validation_0-rmse:28902.6
[177] validation_0-rmse:28912.8
[178] validation_0-rmse:28920
[179] validation_0-rmse:28922.6
[180] validation_0-rmse:28921.1
[181] validation_0-rmse:28936.4
[182] validation_0-rmse:28928.9
[183] validation_0-rmse:28933.2
[184] validation_0-rmse:28943.1
[185] validation_0-rmse:28962.6
[186] validation_0-rmse:28954.3
[187] validation_0-rmse:28951.6
[188] validation_0-rmse:28974.4
[189] validation_0-rmse:28970.6
[190] validation_0-rmse:28963.1
[191] validation_0-rmse:28939.3
[192] validation_0-rmse:28924.6
[193] validation_0-rmse:28928.5
[194] validation_0-rmse:28901.6
[195] validation_0-rmse:28909.4
[196] validation_0-rmse:28916.1
[197] validation_0-rmse:28901.4
[198] validation_0-rmse:28938.1
[199] validation_0-rmse:28940.1
[200] validation_0-rmse:28932.4
[201] validation_0-rmse:28934.9
[202] validation_0-rmse:28936.3
[203] validation_0-rmse:28922.4
[204] validation_0-rmse:28926.8
[205] validation_0-rmse:28932
[206] validation_0-rmse:28939.5
[207] validation_0-rmse:28921.5
[208] validation_0-rmse:28919.8
[209] validation_0-rmse:28924.3
[210] validation_0-rmse:28925.9
[211] validation_0-rmse:28919.2
[212] validation_0-rmse:28917.9
[213] validation_0-rmse:28917.9
[214] validation_0-rmse:28912.3
[215] validation_0-rmse:28898.2
[216] validation_0-rmse:28912.9
[217] validation_0-rmse:28916.5
[218] validation_0-rmse:28914.3
[219] validation_0-rmse:28911.6
[220] validation_0-rmse:28895.6
[221] validation_0-rmse:28901.2
[222] validation_0-rmse:28905.6
[223] validation_0-rmse:28926.4
[224] validation_0-rmse:28944.4
[225] validation_0-rmse:28926.4
[226] validation_0-rmse:28925.8
[227] validation_0-rmse:28930.3
[228] validation_0-rmse:28955.5
[229] validation_0-rmse:28927.8
[230] validation_0-rmse:28928.9
[231] validation_0-rmse:28921.9
[232] validation_0-rmse:28915.9
[233] validation_0-rmse:28908.6
[234] validation_0-rmse:28903.8
[235] validation_0-rmse:28896.9
[236] validation_0-rmse:28888.9
[237] validation_0-rmse:28880.6
[238] validation_0-rmse:28879.3
[239] validation_0-rmse:28883
[240] validation_0-rmse:28876.1
[241] validation_0-rmse:28869.3
[242] validation_0-rmse:28859.6
[243] validation_0-rmse:28858.3
[244] validation_0-rmse:28852.6
[245] validation_0-rmse:28853.7
[246] validation_0-rmse:28840.7
[247] validation_0-rmse:28847.1
[248] validation_0-rmse:28857.7
[249] validation_0-rmse:28852
[250] validation_0-rmse:28854.6
[251] validation_0-rmse:28820.1
[252] validation_0-rmse:28805
[253] validation_0-rmse:28828.3
[254] validation_0-rmse:28833.4
[255] validation_0-rmse:28827
[256] validation_0-rmse:28824.3
[257] validation_0-rmse:28828.1
[258] validation_0-rmse:28825.4
[259] validation_0-rmse:28820.9
[260] validation_0-rmse:28820.4
[261] validation_0-rmse:28822.9
[262] validation_0-rmse:28825.8
[263] validation_0-rmse:28825.9
[264] validation_0-rmse:28835.6
[265] validation_0-rmse:28834.3
[266] validation_0-rmse:28829.8
[267] validation_0-rmse:28814.6
[268] validation_0-rmse:28819.8
[269] validation_0-rmse:28823.9
[270] validation_0-rmse:28819.2
[271] validation_0-rmse:28826.2
[272] validation_0-rmse:28853.1
[273] validation_0-rmse:28859.1
[274] validation_0-rmse:28866.2
[275] validation_0-rmse:28866.5
[276] validation_0-rmse:28867.7
[277] validation_0-rmse:28870.8
[278] validation_0-rmse:28876.8
[279] validation_0-rmse:28870.4
[280] validation_0-rmse:28887.9
[281] validation_0-rmse:28894.4
[282] validation_0-rmse:28908.6
[283] validation_0-rmse:28893
[284] validation_0-rmse:28901.3
[285] validation_0-rmse:28904.4
[286] validation_0-rmse:28913.4
[287] validation_0-rmse:28907.5
[288] validation_0-rmse:28914.9
[289] validation_0-rmse:28915.4
[290] validation_0-rmse:28908.2
[291] validation_0-rmse:28903.2
[292] validation_0-rmse:28901
[293] validation_0-rmse:28898.2
[294] validation_0-rmse:28898.6
[295] validation_0-rmse:28909.1
[296] validation_0-rmse:28914.4
[297] validation_0-rmse:28925.6
[298] validation_0-rmse:28924
[299] validation_0-rmse:28920.3
[300] validation_0-rmse:28926.2
[301] validation_0-rmse:28929.9
[302] validation_0-rmse:28928.4
[303] validation_0-rmse:28933.1
[304] validation_0-rmse:28920.7
[305] validation_0-rmse:28915.5
[306] validation_0-rmse:28922.2
[307] validation_0-rmse:28917
[308] validation_0-rmse:28918.5
[309] validation_0-rmse:28918.4
[310] validation_0-rmse:28917.9
[311] validation_0-rmse:28926.3
[312] validation_0-rmse:28892.9
[313] validation_0-rmse:28885.2
[314] validation_0-rmse:28890.8
[315] validation_0-rmse:28894.7
[316] validation_0-rmse:28881.6
[317] validation_0-rmse:28858.4
[318] validation_0-rmse:28860.7
[319] validation_0-rmse:28857.3
[320] validation_0-rmse:28858.5
[321] validation_0-rmse:28858
[322] validation_0-rmse:28847.7
[323] validation_0-rmse:28851.3
[324] validation_0-rmse:28856.9
[325] validation_0-rmse:28866.9
[326] validation_0-rmse:28865.9
[327] validation_0-rmse:28869.5
[328] validation_0-rmse:28871
[329] validation_0-rmse:28874.1
[330] validation_0-rmse:28863.4
[331] validation_0-rmse:28854.3
[332] validation_0-rmse:28857.5
[333] validation_0-rmse:28854.1
[334] validation_0-rmse:28851.4
[335] validation_0-rmse:28852.2
[336] validation_0-rmse:28877.6
[337] validation_0-rmse:28872.9
[338] validation_0-rmse:28876.8
[339] validation_0-rmse:28877.4
[340] validation_0-rmse:28884.2
[341] validation_0-rmse:28886.1
[342] validation_0-rmse:28885.9
[343] validation_0-rmse:28887.1
[344] validation_0-rmse:28894.4
[345] validation_0-rmse:28894.3
[346] validation_0-rmse:28896.1
[347] validation_0-rmse:28893.6
[348] validation_0-rmse:28893.9
[349] validation_0-rmse:28883.1
[350] validation_0-rmse:28893.8
[351] validation_0-rmse:28889.1
[352] validation_0-rmse:28897.9
[353] validation_0-rmse:28896.1
[354] validation_0-rmse:28894
[355] validation_0-rmse:28890.4
[356] validation_0-rmse:28899.3
[357] validation_0-rmse:28896.8
[358] validation_0-rmse:28906.3
[359] validation_0-rmse:28908.2
[360] validation_0-rmse:28902.9
[361] validation_0-rmse:28896.6
[362] validation_0-rmse:28902
[363] validation_0-rmse:28897.8
[364] validation_0-rmse:28908.5
[365] validation_0-rmse:28908.7
[366] validation_0-rmse:28909.6
[367] validation_0-rmse:28917.4
[368] validation_0-rmse:28918.3
[369] validation_0-rmse:28912.8
[370] validation_0-rmse:28907.3
[371] validation_0-rmse:28903.3
[372] validation_0-rmse:28901.6
[373] validation_0-rmse:28899.8
[374] validation_0-rmse:28901.6
[375] validation_0-rmse:28894.2
[376] validation_0-rmse:28894.8
[377] validation_0-rmse:28888.4
[378] validation_0-rmse:28881.4
[379] validation_0-rmse:28894.2
[380] validation_0-rmse:28893.6
[381] validation_0-rmse:28879.3
[382] validation_0-rmse:28877
[383] validation_0-rmse:28879.1
[384] validation_0-rmse:28892.3
[385] validation_0-rmse:28900.9
[386] validation_0-rmse:28877.5
[387] validation_0-rmse:28882.3
[388] validation_0-rmse:28895.5
[389] validation_0-rmse:28897.4
[390] validation_0-rmse:28907.4
[391] validation_0-rmse:28910.5
[392] validation_0-rmse:28911.6
[393] validation_0-rmse:28911.6
[394] validation_0-rmse:28915.3
[395] validation_0-rmse:28914.9
[396] validation_0-rmse:28910.1
[397] validation_0-rmse:28910.6
[398] validation_0-rmse:28913.2
[399] validation_0-rmse:28913.3
[400] validation_0-rmse:28913.4
[401] validation_0-rmse:28911.5
[402] validation_0-rmse:28910.4
[403] validation_0-rmse:28916.3
[404] validation_0-rmse:28908.8
[405] validation_0-rmse:28905.1
[406] validation_0-rmse:28904.4
[407] validation_0-rmse:28913.4
[408] validation_0-rmse:28908.5
[409] validation_0-rmse:28905.5
[410] validation_0-rmse:28899.5
[411] validation_0-rmse:28900.5
[412] validation_0-rmse:28902.3
[413] validation_0-rmse:28900.6
[414] validation_0-rmse:28891.7
[415] validation_0-rmse:28890.1
[416] validation_0-rmse:28890.3
[417] validation_0-rmse:28891.3
[418] validation_0-rmse:28886
[419] validation_0-rmse:28879.5
[420] validation_0-rmse:28874.5
[421] validation_0-rmse:28888.2
[422] validation_0-rmse:28892.8
[423] validation_0-rmse:28891.8
[424] validation_0-rmse:28890.3
[425] validation_0-rmse:28890.7
[426] validation_0-rmse:28890.3
[427] validation_0-rmse:28890.3
[428] validation_0-rmse:28890
[429] validation_0-rmse:28881.1
[430] validation_0-rmse:28880
[431] validation_0-rmse:28876.4
[432] validation_0-rmse:28890.2
[433] validation_0-rmse:28887.9
[434] validation_0-rmse:28885.6
[435] validation_0-rmse:28906.9
[436] validation_0-rmse:28902.8
[437] validation_0-rmse:28902.6
[438] validation_0-rmse:28904.9
[439] validation_0-rmse:28905.2
[440] validation_0-rmse:28902.1
[441] validation_0-rmse:28901.7
[442] validation_0-rmse:28907
[443] validation_0-rmse:28904.5
[444] validation_0-rmse:28901.2
[445] validation_0-rmse:28900.6
[446] validation_0-rmse:28903.4
[447] validation_0-rmse:28899.7
[448] validation_0-rmse:28898.9
[449] validation_0-rmse:28910.5
[450] validation_0-rmse:28912.7
[451] validation_0-rmse:28913.9
[452] validation_0-rmse:28910.5
[453] validation_0-rmse:28919.7
[454] validation_0-rmse:28923.3
[455] validation_0-rmse:28929.6
[456] validation_0-rmse:28925.7
[457] validation_0-rmse:28923.8
[458] validation_0-rmse:28929.6
[459] validation_0-rmse:28925.1
[460] validation_0-rmse:28930.4
[461] validation_0-rmse:28927.4
[462] validation_0-rmse:28917.7
[463] validation_0-rmse:28909.1
[464] validation_0-rmse:28907.8
[465] validation_0-rmse:28907.8
[466] validation_0-rmse:28906.8
[467] validation_0-rmse:28906.3
[468] validation_0-rmse:28904
[469] validation_0-rmse:28898.2
[470] validation_0-rmse:28897.3
[471] validation_0-rmse:28901.4
[472] validation_0-rmse:28900.3
[473] validation_0-rmse:28901.9
[474] validation_0-rmse:28898.7
[475] validation_0-rmse:28897.2
[476] validation_0-rmse:28897.3
[477] validation_0-rmse:28893.1
[478] validation_0-rmse:28891.9
[479] validation_0-rmse:28891.6
[480] validation_0-rmse:28902.1
[481] validation_0-rmse:28904.4
[482] validation_0-rmse:28902
[483] validation_0-rmse:28897.6
[484] validation_0-rmse:28895.8
[485] validation_0-rmse:28897.3
[486] validation_0-rmse:28901.6
[487] validation_0-rmse:28892.9
[488] validation_0-rmse:28888.8
[489] validation_0-rmse:28884.8
[490] validation_0-rmse:28877.5
[491] validation_0-rmse:28878.4
[492] validation_0-rmse:28875.8
[493] validation_0-rmse:28878.2
[494] validation_0-rmse:28876.5
[495] validation_0-rmse:28870.1
[496] validation_0-rmse:28869.7
[497] validation_0-rmse:28870.7
[498] validation_0-rmse:28868.4
[499] validation_0-rmse:28875
[500] validation_0-rmse:28874
[501] validation_0-rmse:28872.3
[502] validation_0-rmse:28868.3
[503] validation_0-rmse:28869.3
[504] validation_0-rmse:28870.8
[505] validation_0-rmse:28870.6
[506] validation_0-rmse:28868.3
[507] validation_0-rmse:28866.3
[508] validation_0-rmse:28872.7
[509] validation_0-rmse:28870.1
[510] validation_0-rmse:28869.5
[511] validation_0-rmse:28871.1
[512] validation_0-rmse:28863.8
[513] validation_0-rmse:28865.1
[514] validation_0-rmse:28860.9
[515] validation_0-rmse:28857.9
[516] validation_0-rmse:28855.7
[517] validation_0-rmse:28853.2
[518] validation_0-rmse:28855.5
[519] validation_0-rmse:28861.7
[520] validation_0-rmse:28861.7
[521] validation_0-rmse:28859.4
[522] validation_0-rmse:28860.4
[523] validation_0-rmse:28860.2
[524] validation_0-rmse:28857.7
[525] validation_0-rmse:28857.9
[526] validation_0-rmse:28857.7
[527] validation_0-rmse:28855.6
[528] validation_0-rmse:28855.5
[529] validation_0-rmse:28856.4
[530] validation_0-rmse:28856.7
[531] validation_0-rmse:28860.3
[532] validation_0-rmse:28860.2
[533] validation_0-rmse:28859.3
[534] validation_0-rmse:28863.8
[535] validation_0-rmse:28848.5
[536] validation_0-rmse:28846.4
[537] validation_0-rmse:28850.4
[538] validation_0-rmse:28849.5
[539] validation_0-rmse:28847.7
[540] validation_0-rmse:28846.1
[541] validation_0-rmse:28835.8
[542] validation_0-rmse:28839.5
[543] validation_0-rmse:28837.2
[544] validation_0-rmse:28834.1
[545] validation_0-rmse:28840.6
[546] validation_0-rmse:28839.2
[547] validation_0-rmse:28845.4
[548] validation_0-rmse:28838.7
[549] validation_0-rmse:28839.2
[550] validation_0-rmse:28846.2
[551] validation_0-rmse:28849.6
[552] validation_0-rmse:28842.5
[553] validation_0-rmse:28841.8
[554] validation_0-rmse:28841.2
[555] validation_0-rmse:28847.9
[556] validation_0-rmse:28841.4
[557] validation_0-rmse:28836.6
[558] validation_0-rmse:28831.8
[559] validation_0-rmse:28827.7
[560] validation_0-rmse:28825.7
[561] validation_0-rmse:28823.8
[562] validation_0-rmse:28822.1
[563] validation_0-rmse:28816.9
[564] validation_0-rmse:28816.1
[565] validation_0-rmse:28814.3
[566] validation_0-rmse:28814.5
[567] validation_0-rmse:28812.2
[568] validation_0-rmse:28812.5
[569] validation_0-rmse:28814.9
[570] validation_0-rmse:28806.5
[571] validation_0-rmse:28809.9
[572] validation_0-rmse:28807.4
[573] validation_0-rmse:28807.3
[574] validation_0-rmse:28796.5
[575] validation_0-rmse:28799.7
[576] validation_0-rmse:28796
[577] validation_0-rmse:28790.6
[578] validation_0-rmse:28789.5
[579] validation_0-rmse:28790.1
[580] validation_0-rmse:28788.8
[581] validation_0-rmse:28784.5
[582] validation_0-rmse:28780
[583] validation_0-rmse:28778.9
[584] validation_0-rmse:28777.6
[585] validation_0-rmse:28774.6
[586] validation_0-rmse:28773.2
[587] validation_0-rmse:28776.1
[588] validation_0-rmse:28775.8
[589] validation_0-rmse:28780.4
[590] validation_0-rmse:28783.6
[591] validation_0-rmse:28785.2
[592] validation_0-rmse:28778.1
[593] validation_0-rmse:28775.9
[594] validation_0-rmse:28774.9
[595] validation_0-rmse:28771.2
[596] validation_0-rmse:28768.8
[597] validation_0-rmse:28773.4
[598] validation_0-rmse:28773.1
[599] validation_0-rmse:28772.1
[600] validation_0-rmse:28775
[601] validation_0-rmse:28786.1
[602] validation_0-rmse:28786.5
[603] validation_0-rmse:28794.3
[604] validation_0-rmse:28793.5
[605] validation_0-rmse:28790.4
[606] validation_0-rmse:28787.6
[607] validation_0-rmse:28789.8
[608] validation_0-rmse:28792.1
[609] validation_0-rmse:28791.5
[610] validation_0-rmse:28790.3
[611] validation_0-rmse:28796.5
[612] validation_0-rmse:28797.9
[613] validation_0-rmse:28799.8
[614] validation_0-rmse:28801
[615] validation_0-rmse:28800.9
[616] validation_0-rmse:28805.8
[617] validation_0-rmse:28801.8
[618] validation_0-rmse:28798.6
[619] validation_0-rmse:28797.5
[620] validation_0-rmse:28799
[621] validation_0-rmse:28800.5
[622] validation_0-rmse:28803.4
[623] validation_0-rmse:28802.3
[624] validation_0-rmse:28802.3
[625] validation_0-rmse:28801.9
[626] validation_0-rmse:28801.9
[627] validation_0-rmse:28801
[628] validation_0-rmse:28795.4
[629] validation_0-rmse:28790.9
[630] validation_0-rmse:28791.1
[631] validation_0-rmse:28791
[632] validation_0-rmse:28782.3
[633] validation_0-rmse:28780.3
[634] validation_0-rmse:28779.4
[635] validation_0-rmse:28780.9
[636] validation_0-rmse:28777.3
[637] validation_0-rmse:28773.7
[638] validation_0-rmse:28779
[639] validation_0-rmse:28782.5
[640] validation_0-rmse:28781.6
[641] validation_0-rmse:28781.3
[642] validation_0-rmse:28781.7
[643] validation_0-rmse:28781.5
[644] validation_0-rmse:28785.5
[645] validation_0-rmse:28781.3
[646] validation_0-rmse:28780.2
[647] validation_0-rmse:28780.2
[648] validation_0-rmse:28778.8
[649] validation_0-rmse:28776
[650] validation_0-rmse:28774.1
[651] validation_0-rmse:28776.9
[652] validation_0-rmse:28777
[653] validation_0-rmse:28774.8
[654] validation_0-rmse:28777.4
[655] validation_0-rmse:28781.1
[656] validation_0-rmse:28776.8
[657] validation_0-rmse:28775.4
[658] validation_0-rmse:28771
[659] validation_0-rmse:28770.4
[660] validation_0-rmse:28778.1
[661] validation_0-rmse:28769.4
[662] validation_0-rmse:28767
[663] validation_0-rmse:28764.2
[664] validation_0-rmse:28766.6
[665] validation_0-rmse:28765.8
[666] validation_0-rmse:28760.7
[667] validation_0-rmse:28763.4
[668] validation_0-rmse:28758.1
[669] validation_0-rmse:28760.9
[670] validation_0-rmse:28761.8
[671] validation_0-rmse:28759.2
[672] validation_0-rmse:28757.9
[673] validation_0-rmse:28758.3
[674] validation_0-rmse:28760.6
[675] validation_0-rmse:28764.9
[676] validation_0-rmse:28769.8
[677] validation_0-rmse:28774.3
[678] validation_0-rmse:28774
[679] validation_0-rmse:28774.8
[680] validation_0-rmse:28774.2
[681] validation_0-rmse:28774.6
[682] validation_0-rmse:28764.7
[683] validation_0-rmse:28764
[684] validation_0-rmse:28764
[685] validation_0-rmse:28765.3
[686] validation_0-rmse:28764.3
[687] validation_0-rmse:28764.1
[688] validation_0-rmse:28761.1
[689] validation_0-rmse:28760.1
[690] validation_0-rmse:28763.4
[691] validation_0-rmse:28763.4
[692] validation_0-rmse:28764
[693] validation_0-rmse:28759.3
[694] validation_0-rmse:28758.3
[695] validation_0-rmse:28755.2
[696] validation_0-rmse:28754.8
[697] validation_0-rmse:28756.4
[698] validation_0-rmse:28758.5
[699] validation_0-rmse:28759.7
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
max_depth=3, min_child_weight=1, missing=None, n_estimators=700,
n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
silent=True, subsample=1)
n_jobs: 大数据集上并行化
improved_model.score(X_test, y_test)
0.8453871314482886
#rmse
mean_absolute_error(improved_model.predict(X_test), y_test)
16569.6632844917
XGBRegressor/XGBClassifier(booster='gbtree', #gbtree树模型做基学习器(默认) booster='gbliner'线性模型做基学习器
n_estimators=700, #700个基学习器,迭代700轮(fit中展示训练情况,然后不断试)
silent=True, #不输出中间结果(默认),沉默嘛
learning_rate=0.1, #学习率(步长) 默认0.1
objective='reg:linear', #目标函数
#回归任务:
# reg:linear--默认
# reg:logistic
#分类任务:
# binary:logistic--二分类 返回概率
# binary:logitraw-二分类 返回类别
# multi:softmax--多分类 使用softmax的多分类器,返回预测的类别。需要多设一个参数:num_class(类别数目)
# multi:softprob--多分类 返回的是每个数据属于各个类别的概率。需要多设一个参数:num_class(类别数目)
max_depth=3,
min_child_weight=1, #???????????????没有太理解
n_jobs=1,
random_state=0,
scale_pos_weight=1 #正positive样本权重当正负样本比例失衡时,设置正样本的权重
reg_alpha=0, #L1正则化系数
reg_lambda=1, #L2正则化系数
subsample=1, #随机采样的样本比例
silent=True) ##不输出中间结果(默认),沉默嘛
method fit in module xgboost.sklearn:
help(improved_model.fit)
improved_mode.fit( X, #feature matrix array_like
y, #lables array_like
sample_weight, #样本权重 array_like
eval_set, #验证集 list,可选,训练集拟合,用验证集估计误差,若验证集误差升高,则early-stopping
sample_weight_eval_set,
eval_metric, #评估度量方法
#回归任务:rmse--均方根误差(默认)
# mae--平均绝对误差
#分类任务:error--错误率(二分类)(默认)
# auc-roc曲线下面积
# merror--错误率(多分类)
# logloss--负对数似然函数(二分类)
# mlogloss--负对数似然函数(多分类)
early_stopping_rounds, #int 早停 在验证集上,当连续迭代r次,误差没有降低,提前停止迭代
verbose, # bool 为True,则输出每轮的evaluation metric情况 bool
xgb_model) # str