class Ensemable(object):
def __init__(self, n_splits, stacker, base_models):
self.n_splits = n_splits
self.stacker = stacker
self.base_models = base_models
def fit_predict(self, x, y, t):
x = np.array(x)
y = np.array(y)
t = np.array(t)
folds = list(StratifiedKFold(n_splits=self.n_splits, shuffle=True, random_state=2017).split(x, y))
s_train = np.zeros((x.shape[0], len(self.base_models)))
s_test = np.zeros((t.shape[0], len(self.base_models)))
for i, clf in enumerate(self.base_models):
s_test_i = np.zeros((t.shape[0], self.n_splits))
for j, (train_inx, test_inx) in enumerate(folds):
x_train = x[train_inx]
y_train = y[train_inx]
x_test = x[test_inx]
print("Fit %s fold %d" % (str(clf).split('(')[0], j + 1))
clf.fit(x_train, y_train)
score = cross_val_score(clf, x_train, y_train, cv=3, scoring="roc_auc")
print("cross_score: %.5f" % (score.mean()))
y_pred = clf.predict_proba(x_test)[:, 1]
s_train[test_inx, i] = y_pred
s_test_i[:, j] = clf.predict_proba(t)[:, 1] #取预测为1的概率
s_test[:, i] = s_test_i.mean(axis=1)# columns
result = cross_val_score(self.stacker, s_train, y, cv=3, scoring="roc_auc")
print("Stacker score: %.5f" % (result.mean()))
self.stacker.fit(s_train, y)
res = self.stacker.predict_proba(s_test)[:, 1]
return res