MNIST1_sklearn.ensemble集成模型训练

针对MNIST数据集进行sklearn中的集成模型的训练和测试
部分脚本如下: 完整脚本见笔者github

import pandas as pd 
import numpy as np
from sklearn.datasets import fetch_mldata
import warnings
warnings.filterwarnings(action='ignore')

def get_ministdata():
    data_home = r'..' # mnist-original.mat文件下载位置
    mnist = fetch_mldata('MNIST original', data_home=data_home)
    return pd.DataFrame(np.c_[mnist['data']/255, mnist['target']])


def sklearn_clf(clf_model_func, tr, te):
    clf_model = clf_model_func()
    clf_model.fit(tr.iloc[:, :-1].values, tr.iloc[:, -1].values)
    pred = clf_model.predict(te.iloc[:, :-1].values)
    y_te =  te.iloc[:, -1].values
    acc_ = sum(pred == y_te)/len(y_te) * 100
    return f'model: {clf_model_func.__name__}, acc: {acc_:.2f}'

import sklearn as skl
if __name__ == '__main__':
    mnistdf = get_ministdata()
    te_index = mnistdf.sample(frac=0.8).index.tolist()
    mnist_te = mnistdf.loc[te_index, :]
    mnist_tr = mnistdf.loc[~mnistdf.index.isin(te_index), :]
    # 用集成模型训练 & 预测
    ensemble_func_lst = [i for i in dir(skl.ensemble) if 'Classifier' in i and 'Voting' not in i]
    print(ensemble_func_lst)
    for clf_ in ensemble_func_lst:
        print(f'test clf_: {clf_}')
        msg=sklearn_clf(eval(f'skl.ensemble.{clf_}'), mnist_tr, mnist_te)
        print(msg)
"""
sklearn_clf, take_time:43.97123s >> model: AdaBoostClassifier, acc: 70.88
sklearn_clf, take_time:62.52457s >> model: BaggingClassifier, acc: 91.86
sklearn_clf, take_time:3.11310s >> model: ExtraTreesClassifier, acc: 92.34
sklearn_clf, take_time:1510.23123s >> model: GradientBoostingClassifier, acc: 93.48
sklearn_clf, take_time:3.57081s >> model: RandomForestClassifier, acc: 91.63
"""

你可能感兴趣的:(MNIST,机器学习)