6.13 sklearn

6.13 sklearn_第1张图片

6.13 sklearn_第2张图片\


机器学习相关内容(不是特别懂)

简单的来说就自己随机生成一个数据集,然后用3种拟合算法拟合,然后评估这个模型拟合的效果。

代码如下

from sklearn import datasets
from sklearn import model_selection
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

X, Y = datasets.make_classification(n_samples=1000, n_features=10,
                                       n_informative=2, n_redundant=2, n_repeated=0, n_classes=2)

kf = model_selection.KFold(n_splits=10)

i = 0
acc_sum1 = 0
f1_sum1 = 0
auc_sum1 = 0
acc_sum2 = 0
f1_sum2 = 0
auc_sum2 = 0
acc_sum3 = 0
f1_sum3 = 0
auc_sum3 = 0
for train_index, test_index in kf.split(X):
    i = i + 1
    # print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = Y[train_index], Y[test_index]
    clf1 = GaussianNB()
    clf1.fit(X_train, y_train)
    pred1 = clf1.predict(X_test)

    acc = metrics.accuracy_score(y_test, pred1)
    f1 = metrics.f1_score(y_test, pred1)
    auc = metrics.roc_auc_score(y_test, pred1)
    # print(i, "GaussianNB result:")
    # print("data:", X_test)
    # print("result:", pred1)
    # print("acc:", acc)
    # print("f1:", f1)
    # print("auc:", auc)
    acc_sum1 += acc
    f1_sum1 += f1
    auc_sum1 += auc
    # print()

    clf2 = SVC(C=1e00, kernel='rbf')
    clf2.fit(X_train, y_train)
    pred2 = clf2.predict(X_test)

    acc = metrics.accuracy_score(y_test, pred2)
    f1 = metrics.f1_score(y_test, pred2)
    auc = metrics.roc_auc_score(y_test, pred2)
    # print(i, "SVC result:")
    # print("data:", X_test)
    # print("result:", pred2)
    # print("acc:", acc)
    # print("f1:", f1)
    # print("auc:", auc)
    acc_sum2 += acc
    f1_sum2 += f1
    auc_sum2 += auc
    # print()

    clf3 = RandomForestClassifier(n_estimators=100)
    clf3.fit(X_train, y_train)
    pred3 = clf3.predict(X_test)
    acc = metrics.accuracy_score(y_test, pred3)
    f1 = metrics.f1_score(y_test, pred3)
    auc = metrics.roc_auc_score(y_test, pred3)
    # print(i, "RandomForestClassifier result:")
    # print("data:", X_test)
    # print("result:", pred3)
    # print("acc:", acc)
    # print("f1:", f1)
    # print("auc:", auc)
    acc_sum3 += acc
    f1_sum3 += f1
    auc_sum3 += auc
    # print()


print("GaussianNB average result:")
print(acc_sum1/kf.get_n_splits(X))
print(f1_sum1/kf.get_n_splits(X))
print(auc_sum1/kf.get_n_splits(X))
print()
print(i, "SVC result:")
print(acc_sum2/kf.get_n_splits(X))
print(f1_sum2/kf.get_n_splits(X))
print(auc_sum2/kf.get_n_splits(X))
print()
print(i, "RandomForestClassifier result:")
print(acc_sum3/kf.get_n_splits(X))
print(f1_sum3/kf.get_n_splits(X))
print(auc_sum3/kf.get_n_splits(X))

其中的一次结果如下:

GaussianNB average result:
0.9039999999999999
0.9041081812466965
0.9043535790181867


SVC average result:
0.9560000000000001
0.9562470859437677
0.955828927779609


RandomForestClassifier average result:
0.97
0.9703207038834705

0.9698537808461255


经过多次测试,通常情况下

准确度从小到大为:

朴素贝叶斯


你可能感兴趣的:(作业)