由于端午放假,导致我以为昨天才星期一,作业和课程设计事情又多,就忘记把写好的代码发到博客上了,希望TA见谅。
作业题目
代码如下:
from sklearn import datasets, cross_validation, metrics
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
datasets = datasets.make_classification(n_samples=1000, n_features=10)
kfold = cross_validation.KFold(len(datasets[0]), n_folds=10, shuffle=True)
clf = GaussianNB()
for train_index, test_index in kfold:
X_train, y_train = datasets[0][train_index], datasets[1][train_index]
X_test, y_test = datasets[0][test_index], datasets[1][test_index]
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
Accuracy = metrics.accuracy_score(y_test, pred)
F1_score = metrics.f1_score(y_test, pred)
AUC_ROC = metrics.roc_auc_score(y_test, pred)
print("****************")
print("Naive Bayes")
print("Accuracy:", Accuracy)
print("F1_score:", F1_score)
print("AUC_ROC", AUC_ROC)
print("****************")
for C_data in [1e-02, 1e-01, 1e00, 1e01, 1e02]:
clf = SVC(C=C_data, kernel='rbf', gamma=0.1)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
Accuracy = metrics.accuracy_score(y_test, pred)
F1_score = metrics.f1_score(y_test, pred)
AUC_ROC = metrics.roc_auc_score(y_test, pred)
print("****************")
print("SVM C_data",C_data)
print("Accuracy:", Accuracy)
print("F1_score:", F1_score)
print("AUC_ROC", AUC_ROC)
print("****************")
for n_data in [10,100,1000]:
clf=RandomForestClassifier(n_estimators=n_data)
clf.fit(X_train,y_train)
pred=clf.predict(X_test)
Accuracy = metrics.accuracy_score(y_test, pred)
F1_score = metrics.f1_score(y_test, pred)
AUC_ROC = metrics.roc_auc_score(y_test, pred)
print("****************")
print("Random Forest n_data", n_data)
print("Accuracy:", Accuracy)
print("F1_score:", F1_score)
print("AUC_ROC", AUC_ROC)
print("****************")
选取一个样例,各个方法的结果如下:
****************
Naive Bayes
Accuracy: 0.94
F1_score: 0.9423076923076923
AUC_ROC 0.9409875551987155
****************
****************
SVM C_data 0.01
Accuracy: 0.88
F1_score: 0.8888888888888888
AUC_ROC 0.8783621035728623
****************
****************
SVM C_data 0.1
Accuracy: 0.93
F1_score: 0.9345794392523364
AUC_ROC 0.9291449217181855
****************
****************
SVM C_data 1.0
Accuracy: 0.96
F1_score: 0.9622641509433962
AUC_ROC 0.9598554797270173
****************
****************
SVM C_data 10.0
Accuracy: 0.97
F1_score: 0.9719626168224299
AUC_ROC 0.9692894419911682
****************
****************
SVM C_data 100.0
Accuracy: 0.91
F1_score: 0.9142857142857143
AUC_ROC 0.9102769971898835
****************
****************
Random Forest n_data 10
Accuracy: 0.94
F1_score: 0.9411764705882353
AUC_ROC 0.9421918908069049
****************
****************
Random Forest n_data 100
Accuracy: 0.94
F1_score: 0.9423076923076923
AUC_ROC 0.9409875551987155
****************
****************
Random Forest n_data 1000
Accuracy: 0.94
F1_score: 0.9423076923076923
AUC_ROC 0.9409875551987155
****************