import numpy as np
import xgboost as xgb
dtrain = xgb.DMatrix(basePath+'data/agaricus.txt.train')
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
num_round = 2
print('running cross validation')
running cross validation
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'error'}, seed=0,
callbacks=[xgb.callback.print_evaluation(show_stdv=True)])
[0] train-error:0.0506682+0.009201 test-error:0.0557316+0.0158887 [1] train-error:0.0213034+0.00205561 test-error:0.0211884+0.00365323
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
test-error-mean |
test-error-std |
train-error-mean |
train-error-std |
0 |
0.055732 |
0.015889 |
0.050668 |
0.009201 |
1 |
0.021188 |
0.003653 |
0.021303 |
0.002056 |
print('running cross validation, disable standard deviation display')
running cross validation, disable standard deviation display
res = xgb.cv(param, dtrain, num_boost_round=10, nfold=5,
metrics={'error'}, seed=0,
callbacks=[xgb.callback.print_evaluation(show_stdv=False),
xgb.callback.early_stop(3)])
[0] train-error:0.0506682 test-error:0.0557316 Multiple eval metrics have been passed: ‘test-error’ will be used for early stopping. Will train until test-error hasn’t improved in 3 rounds. [1] train-error:0.0213034 test-error:0.0211884 [2] train-error:0.0099418 test-error:0.0099786 [3] train-error:0.0141256 test-error:0.0144336 [4] train-error:0.0059878 test-error:0.0062948 [5] train-error:0.0020344 test-error:0.0016886 [6] train-error:0.0012284 test-error:0.001228 [7] train-error:0.0012284 test-error:0.001228 [8] train-error:0.0009212 test-error:0.001228 [9] train-error:0.0006142 test-error:0.001228 Stopping. Best iteration: [6] train-error:0.0012284+0.000260265 test-error:0.001228+0.00104094
print(res)
test-error-mean test-error-std train-error-mean train-error-std 0 0.055732 0.015889 0.050668 0.009201 1 0.021188 0.003653 0.021303 0.002056 2 0.009979 0.004828 0.009942 0.006076 3 0.014434 0.003517 0.014126 0.001706 4 0.006295 0.003123 0.005988 0.001878 5 0.001689 0.000574 0.002034 0.001470 6 0.001228 0.001041 0.001228 0.000260
print('running cross validation, with preprocessing function')
running cross validation, with preprocessing function
def fpreproc(dtrain, dtest, param):
label = dtrain.get_label()
ratio = float(np.sum(label == 0)) / np.sum(label == 1)
param['scale_pos_weight'] = ratio
return (dtrain, dtest, param)
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'auc'}, seed=0, fpreproc=fpreproc)
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
test-auc-mean |
test-auc-std |
train-auc-mean |
train-auc-std |
0 |
0.958232 |
0.005778 |
0.958228 |
0.001442 |
1 |
0.981431 |
0.002595 |
0.981414 |
0.000647 |
print('running cross validation, with cutomsized loss function')
running cross validation, with cutomsized loss function
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds))
grad = preds - labels
hess = preds * (1.0 - preds)
return grad, hess
def evalerror(preds, dtrain):
labels = dtrain.get_label()
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
param = {'max_depth':2, 'eta':1, 'silent':1}
xgb.cv(param, dtrain, num_round, nfold=5, seed=0,
obj=logregobj, feval=evalerror)
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
test-error-mean |
test-error-std |
test-rmse-mean |
test-rmse-std |
train-error-mean |
train-error-std |
train-rmse-mean |
train-rmse-std |
0 |
0.055732 |
0.015889 |
1.598043 |
0.012826 |
0.050668 |
0.009201 |
1.595072 |
0.003868 |
1 |
0.021188 |
0.003653 |
2.449282 |
0.080900 |
0.021303 |
0.002056 |
2.442600 |
0.076834 |