传统机器学习模型对比

from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score, recall_score, f1_score
#读取数据,数据采用sklearn自带的数据集
# digits=load_digits()
# data=digits.data


# y_test = to_categorical(y_test)
#分割数据,数据集中25%作为测试数据,其余作为训练数据
# train_x,test_x,train_y,test_y=train_test_split(data,digits.target,test_size=0.1,random_state=1)
#采用z-score规范化
# ss=preprocessing.StandardScaler()
# train_ss_x=ss.fit_transform(train_x)
# test_ss_x=ss.transform(test_x)
train_ss_x=train_x
test_ss_x=test_x
#创建KNN分类器
knn=KNeighborsClassifier()
knn.fit(train_ss_x,train_y)
#预测结果并输出
predict_y=knn.predict(test_ss_x)
print('KNN准确率:{:.4f}'.format(accuracy_score(test_y,predict_y)))
# print(predict_y)
# print(precision_score(test_y,predict_y, average="micro"))
# print(precision_score(test_y,predict_y, average="macro"))
#数据均衡情况macro
print(precision_score(test_y,predict_y, average="weighted"))
#数据加权weighted
#https://www.cnblogs.com/danny92/p/10675897.html

print(recall_score(test_y,predict_y, average="weighted"))
print(f1_score(test_y,predict_y, average="weighted"))
#创建SVM分类器
svm=SVC()
svm.fit(train_ss_x,train_y)
predict_y=svm.predict(test_ss_x)
print('SVM准确率:{:.4f}'.format(accuracy_score(predict_y,test_y)))
print(precision_score(test_y,predict_y, average="weighted"))
print(recall_score(test_y,predict_y, average="weighted"))
print(f1_score(test_y,predict_y, average="weighted"))
#采用Min-Max规范化
mm=preprocessing.MinMaxScaler()
train_mm_x=mm.fit_transform(train_x)
test_mm_x=mm.transform(test_x)
#创建朴素贝叶斯分类器
mnb=MultinomialNB()
mnb.fit(train_mm_x,train_y)
predict_y=mnb.predict(test_mm_x)
print('多项式朴素贝叶斯准确率:{:.4f}'.format(accuracy_score(predict_y,test_y)))
print(precision_score(test_y,predict_y, average="weighted" ))
print(recall_score(test_y,predict_y, average="weighted"))
print(f1_score(test_y,predict_y, average="weighted"))
#创建cart决策树分类器
dtc=DecisionTreeClassifier()
dtc.fit(train_ss_x,train_y)
predict_y=dtc.predict(test_ss_x)
print('cart决策树准确率:{:.4f}'.format(accuracy_score(predict_y,test_y)))
print(precision_score(test_y,predict_y, average="weighted"))
print(recall_score(test_y,predict_y, average="weighted"))
print(f1_score(test_y,predict_y, average="weighted"))

模型对比

from sklearn.model_selection import GridSearchCV, train_test_split

train_x,test_x,train_y,test_y= train_test_split(data,Y, test_size=0.1)
data=[]
list_=[0 for x in range(0,1000)]
for i in X:
#     print(i.shape)
    s=[0 for x in range(0,1000)]
    count=0
#     print(type(i[0,:]))
#     print(i[0,:].shape)
#     print(i[0,:].tolist())
    for j in i[0,:].tolist():
#         print(j)
#         break
        if j==0.0:
            pass
        else:
            s[count]=1
        count=count+1
    data.append(s)
#     break
data=np.array(data)

你可能感兴趣的:(数据采集与数据挖掘,知识图谱)