这里以xgboost为例
from time import time
import xgboost as xgb
import utils
def main():
t = time()
projects = ['xxx1', 'xxx2', 'xxx3', 'xxx4',
'xxx5', 'xxx6', 'xxx7', 'xxx8', 'xxx9', 'xxx10']
AA, PP, RR, FF = 0, 0, 0, 0
res = {}
repeat_times = 10
for project in projects:
train_y, train_x, test_y, test_x = utils.load_data(project)
A, P, R, F = 0, 0, 0, 0
for _ in range(repeat_times):
clf = xgb.XGBClassifier()
clf.fit(train_x, train_y)
preds = clf.predict(test_x)
a, p, r, f = utils.cal_indicators(preds=preds, labels=test_y)
A += a
P += p
R += r
F += f
A, P, R, F = A/repeat_times, P/repeat_times, R/repeat_times, F/repeat_times
res[project] = [A, P, R, F] #save average APRF
AA += A
PP += P
RR += R
FF += F
AA, PP, RR, FF = AA/len(projects), PP/len(projects), RR/len(projects), FF/len(projects)
for project in res.keys():
indicators = res[project]
A, P, R, F = indicators[0], indicators[1], indicators[2], indicators[3]
print(project, 'Acc:', "{:.4f}".format(A),"Precision=", "{:.4f}".format(P),
"Recall=", "{:.4f}".format(R), "F1-score=", "{:.4f}".format(F))
print('Average', 'Acc:', "{:.4f}".format(AA),"Precision=", "{:.4f}".format(PP),
"Recall=", "{:.4f}".format(RR), "F1-score=", "{:.4f}".format(FF))
print('cost time:', time()-t)
if __name__=='__main__':
main()
utils.py中的load_data函数需要自己实现
def cal_indicators(preds, labels):
tp, tn, fp, fn = 0., 0., 0., 0.
for i in range(len(preds)):
if preds[i] == 0 and labels[i] == 1:
fp += 1
elif preds[i] == 0 and labels[i] == 0:
tn += 1
elif preds[i] == 1 and labels[i] == 1:
tp += 1
elif preds[i] == 1 and labels[i] == 0:
fn += 1
accuracy = (tp+tn)/(tp+tn+fp+fn)
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1 = 2*precision*recall/(precision+recall)
return accuracy, precision, recall, f1