[二分类模板]python对若干数据集重复10次实验取平均结果

这里以xgboost为例

from time import time
import xgboost as xgb
import utils
def main():
    t = time()
    projects = ['xxx1', 'xxx2', 'xxx3', 'xxx4',
      'xxx5', 'xxx6', 'xxx7', 'xxx8', 'xxx9', 'xxx10']
    AA, PP, RR, FF = 0, 0, 0, 0
    res = {}
    repeat_times = 10
    for project in projects:
        train_y, train_x, test_y, test_x = utils.load_data(project)
        A, P, R, F = 0, 0, 0, 0
        for _ in range(repeat_times):
            clf = xgb.XGBClassifier()
            clf.fit(train_x, train_y)
            preds = clf.predict(test_x)
            a, p, r, f = utils.cal_indicators(preds=preds, labels=test_y)
            A += a
            P += p
            R += r
            F += f
        A, P, R, F = A/repeat_times, P/repeat_times, R/repeat_times, F/repeat_times
        res[project] = [A, P, R, F] #save average APRF
        AA += A
        PP += P
        RR += R
        FF += F
    AA, PP, RR, FF = AA/len(projects), PP/len(projects), RR/len(projects), FF/len(projects)
    for project in res.keys():
        indicators = res[project]
        A, P, R, F = indicators[0], indicators[1], indicators[2], indicators[3]
        print(project, 'Acc:', "{:.4f}".format(A),"Precision=", "{:.4f}".format(P),
            "Recall=", "{:.4f}".format(R), "F1-score=", "{:.4f}".format(F))
    
    print('Average', 'Acc:', "{:.4f}".format(AA),"Precision=", "{:.4f}".format(PP),
            "Recall=", "{:.4f}".format(RR), "F1-score=", "{:.4f}".format(FF))

    print('cost time:', time()-t)
if __name__=='__main__':
    main()

utils.py中的load_data函数需要自己实现

def cal_indicators(preds, labels):
    tp, tn, fp, fn = 0., 0., 0., 0.
    for i in range(len(preds)):
        if preds[i] == 0 and labels[i] == 1:
            fp += 1
        elif preds[i] == 0 and labels[i] == 0:
            tn += 1
        elif preds[i] == 1 and labels[i] == 1:
            tp += 1
        elif preds[i] == 1 and labels[i] == 0:
            fn += 1
    accuracy = (tp+tn)/(tp+tn+fp+fn)
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f1 = 2*precision*recall/(precision+recall)
    
    return accuracy, precision, recall, f1

你可能感兴趣的:(python,python,分类,机器学习,二分类,平均)