from sklearn import datasets
from sklearn import cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
def result(pred, y_test, name):
acc = metrics.accuracy_score(y_test, pred)
acc_dict[name].append(acc)
f1 = metrics.f1_score(y_test, pred, average='weighted')
f1_dict[name].append(f1)
auc = metrics.roc_auc_score(y_test, pred)
auc_dict[name].append(auc)
def gaussian(X_train, y_train, X_test, y_test):
clf = GaussianNB()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
result(pred, y_test, 'gaussian')
def SVM(X_train, y_train, X_test, y_test):
clf = SVC(C=1e-01, kernel='rbf', gamma=0.1)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
result(pred, y_test, 'SVM')
def ramdom_forest(X_train, y_train, X_test, y_test):
clf = RandomForestClassifier(n_estimators=6)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
result(pred, y_test, 'random_forest')
def print_evaluation(dict, name):
for i,j in dict.items():
print(name, 'of', i+':', j)
print('\n')
sample = datasets.make_classification(n_samples=1000, n_features=10)
data, lable = sample
kf = cross_validation.KFold(len(data), n_folds=10, shuffle=True)
acc_dict = {'gaussian':[], 'SVM':[], 'random_forest':[]}
f1_dict = {'gaussian':[], 'SVM':[], 'random_forest':[]}
auc_dict = {'gaussian':[], 'SVM':[], 'random_forest':[]}
for train_index, test_index in kf:
X_train, y_train = data[train_index], lable[train_index]
X_test, y_test = data[test_index], lable[test_index]
gaussian(X_train, y_train, X_test, y_test)
SVM(X_train, y_train, X_test, y_test)
ramdom_forest(X_train, y_train, X_test, y_test)
print_evaluation(acc_dict, 'accuracy')
print_evaluation(f1_dict, 'f1_score')
print_evaluation(auc_dict, 'AUC ROC')