python零基础做机器学习实验(决策树/朴素贝叶斯/神经网络/支持向量机/随机森林/判别分析)

import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score,cross_validate
from time import time

#1.交叉验证
data=pd.read_csv('datasets/agaricus-lepiota_dropEmpty_dig.csv',header=None)
Y=data[0]
X=data[data.columns[1:]]

#决策树
# clf = tree.DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None, max_features=10, max_leaf_nodes=None, min_samples_leaf=3, min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=1, splitter='random')
#CART
# clf = tree.DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None, max_features=10, max_leaf_nodes=None, min_samples_leaf=3, min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=1, splitter='random')
#高斯朴素贝叶斯
# clf=GaussianNB()
# K近邻
# clf=KNeighborsClassifier(3)
# 随机森林
# clf=RandomForestClassifier(max_depth=8, random_state=1)
#支持向量机
# clf=SVC(kernel='rbf',C=1.0)
# 多层感知机
# clf=MLPClassifier(hidden_layer_sizes=20,activation='relu',max_iter=200,random_state=1)
# adaboost
clf=AdaBoostClassifier(n_estimators=10)

start=time()
scores=cross_val_score(clf,X,Y,cv=10)
print(time()-start)
print(scores)
print(scores.mean())

#2.划分训练集和测试集
# from sklearn.model_selection import train_test_split
# data=pd.read_csv('datasets/agaricus-lepiota_dropEmpty_dig.csv',header=None)
# Y=data[0]
# X=data[data.columns[1:]]
# x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=1)


#3.训练集和测试集分离
# train_data=pd.read_excel('HCV_train.xls',header=None)
# train_feature=train_data[train_data.columns[:train_data.shape[1]-1]].values
# train_label=list(train_data[train_data.columns[train_data.shape[1]-1]])
# clf = tree.DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None, max_features=7, max_leaf_nodes=None, min_samples_leaf=3, min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=1, splitter='random')
# # clf = GaussianNB()
# # clf=KNeighborsClassifier(2)
# # clf=GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True)
#
# clf = clf.fit(train_feature,train_label)
#
# test_data=pd.read_excel('HCV_test.xls',header=None)
# test_feature=test_data[test_data.columns[:test_data.shape[1]-1]].values
# test_label=list(test_data[test_data.columns[test_data.shape[1]-1]])
# test_pred=clf.predict(test_feature)
# print(np.mean(test_pred==test_label))
# print(confusion_matrix(test_label, test_pred))
# print(classification_report(test_label, test_pred))

你可能感兴趣的:(笔记,python,常用技巧,python,机器学习,决策树,朴素贝叶斯,神经网络)