机器学习相关内容(不是特别懂)
简单的来说就自己随机生成一个数据集,然后用3种拟合算法拟合,然后评估这个模型拟合的效果。
代码如下
from sklearn import datasets
from sklearn import model_selection
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
X, Y = datasets.make_classification(n_samples=1000, n_features=10,
n_informative=2, n_redundant=2, n_repeated=0, n_classes=2)
kf = model_selection.KFold(n_splits=10)
i = 0
acc_sum1 = 0
f1_sum1 = 0
auc_sum1 = 0
acc_sum2 = 0
f1_sum2 = 0
auc_sum2 = 0
acc_sum3 = 0
f1_sum3 = 0
auc_sum3 = 0
for train_index, test_index in kf.split(X):
i = i + 1
# print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
clf1 = GaussianNB()
clf1.fit(X_train, y_train)
pred1 = clf1.predict(X_test)
acc = metrics.accuracy_score(y_test, pred1)
f1 = metrics.f1_score(y_test, pred1)
auc = metrics.roc_auc_score(y_test, pred1)
# print(i, "GaussianNB result:")
# print("data:", X_test)
# print("result:", pred1)
# print("acc:", acc)
# print("f1:", f1)
# print("auc:", auc)
acc_sum1 += acc
f1_sum1 += f1
auc_sum1 += auc
# print()
clf2 = SVC(C=1e00, kernel='rbf')
clf2.fit(X_train, y_train)
pred2 = clf2.predict(X_test)
acc = metrics.accuracy_score(y_test, pred2)
f1 = metrics.f1_score(y_test, pred2)
auc = metrics.roc_auc_score(y_test, pred2)
# print(i, "SVC result:")
# print("data:", X_test)
# print("result:", pred2)
# print("acc:", acc)
# print("f1:", f1)
# print("auc:", auc)
acc_sum2 += acc
f1_sum2 += f1
auc_sum2 += auc
# print()
clf3 = RandomForestClassifier(n_estimators=100)
clf3.fit(X_train, y_train)
pred3 = clf3.predict(X_test)
acc = metrics.accuracy_score(y_test, pred3)
f1 = metrics.f1_score(y_test, pred3)
auc = metrics.roc_auc_score(y_test, pred3)
# print(i, "RandomForestClassifier result:")
# print("data:", X_test)
# print("result:", pred3)
# print("acc:", acc)
# print("f1:", f1)
# print("auc:", auc)
acc_sum3 += acc
f1_sum3 += f1
auc_sum3 += auc
# print()
print("GaussianNB average result:")
print(acc_sum1/kf.get_n_splits(X))
print(f1_sum1/kf.get_n_splits(X))
print(auc_sum1/kf.get_n_splits(X))
print()
print(i, "SVC result:")
print(acc_sum2/kf.get_n_splits(X))
print(f1_sum2/kf.get_n_splits(X))
print(auc_sum2/kf.get_n_splits(X))
print()
print(i, "RandomForestClassifier result:")
print(acc_sum3/kf.get_n_splits(X))
print(f1_sum3/kf.get_n_splits(X))
print(auc_sum3/kf.get_n_splits(X))
其中的一次结果如下:
GaussianNB average result:
0.9039999999999999
0.9041081812466965
0.9043535790181867
SVC average result:
0.9560000000000001
0.9562470859437677
0.955828927779609
RandomForestClassifier average result:
0.97
0.9703207038834705
0.9698537808461255
经过多次测试,通常情况下
准确度从小到大为:
朴素贝叶斯