机器学习(python代码):训练四种学习算法并输出auc

**不多说,直接上代码,为了防止直接抄而不是为了学习,我决定把相关的库就不放上去了,自己上网搜索库中包含的方法即可

def load_data():
    data = pd.read_csv()#需要输入相关文件路径,如果是其他文件的需要查询pandas的read_其他格式
    lable = pd.read_csv()
    data_lable = data.merge(lable, how="left", on="USRID")
    data_lable.drop(['USRID'], axis=1, inplace=True)
    columns = data_lable.columns.tolist()
    # print(columns)
    feature_columns = [i for i in columns if i != "FLAG"]
    # print(feature_columns)
    data_array = data_lable[feature_columns].values#数据集
    lable_array = data_lable['FLAG'].values#标签集
    return train_test_split(data_array, lable_array, test_size = 0.25, random_state = 81,stratify = lable_array)#用于随机将样本集合划分为训练集 和测试集,并返回划分好的训练集和测试集数据。

    
def test_decision_tree(*data):
    X_train,X_test,y_train,y_test=data
    clf = DecisionTreeClassifier(criterion="entropy", max_depth=5)
    clf.fit(X_train,y_train)

    print("decision_tree:training score:{:.4f}".format(clf.score(X_train, y_train)))
    print('-'*30)
    # print(clf.score(X_train, y_train))
    print("decision_tree:testing score:{:.4f}".format(clf.score(X_test, y_test)))
    decision_tree_pre = clf.predict_proba(X_test)[:,1]
    auc_test = roc_auc_score(y_test, decision_tree_pre)
    print("decision_tree_auc_test", auc_test)

def test_adaboost(*data):

    X_train,X_test,y_train,y_test=data
    clf=AdaBoostClassifier()
    clf.fit(X_train,y_train)
    print("adaboost:training score:{:.4f}".format(clf.score(X_train, y_train)))
    print("adaboost:testing score:{:.4f}".format(clf.score(X_test, y_test)))
    adaboost_pre = clf.predict_proba(X_test)[:,1]
    auc_test = roc_auc_score(y_test, adaboost_pre)
    print("adaboost_auc_test", auc_test)

def test_logistic(*data):
    X_train,X_test,y_train,y_test=data
    clf = linear_model.LogisticRegression(solver="liblinear")
    clf.fit(X_train,y_train)

    print("logistic:training score:{:.4f}".format(clf.score(X_train, y_train)))
    print("logistic:testing score:{:.4f}".format(clf.score(X_test, y_test)))
    logistic_tree_pre = clf.predict_proba(X_test)[:,1]
    auc_test = roc_auc_score(y_test, logistic_tree_pre)
    print("logistic_auc_test", auc_test)

def test_rf(*data):
    X_train,X_test,y_train,y_test=data
    clf = RandomForestClassifier(criterion="entropy")
    clf.fit(X_train,y_train)

    print("rf:training score:{:.4f}".format(clf.score(X_train, y_train)))
    print("rf:testing score:{:.4f}".format(clf.score(X_test, y_test)))
    rf_pre = clf.predict_proba(X_test)[:,1]
    auc_test_rf = roc_auc_score(y_test, rf_pre)
    print("rf_auc_test", auc_test_rf)
    rf_pre_train = clf.predict_proba(X_train)[:,1]
    print("rf_auc_train", roc_auc_score(y_train, rf_pre_train))

如果代码有帮助到你们,麻烦点个赞,有啥问题直接评论留言哦,我看到就会回复的,希望能帮到大家,代码有什么需要改进的尽管说,我也要多多学习呢~

你可能感兴趣的:(笔记,python,机器学习)