Cifar-10 导入数据,截取部分进行分析knn,svm,pca

# -*- coding: utf-8 -*-
"""
Created on Wed May 27 22:56:16 2020

@author: guangjie2333

"""


"""

库调用

"""
import numpy as np
import pickle
import matplotlib.pyplot as plt
import PIL.Image as image
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn import svm
"""

内部函数

"""

#读文件的函数
def unpickle(file):  
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
        fo.close()
    return dict



""""
主函数
"""   

if __name__ == '__main__':  
    
    #------第一题-------
    
    # (x_train,y_train),(x_test,y_test) = cifar10.load_data()
    
    #导入文件    
    batches = unpickle('batches.meta')
    # print(batches.keys())
    name = batches.get(b'label_names') #为什么要加个b?
    # print(name[0])

    data_batch1 = unpickle('data_batch_1')
    data_batch2 = unpickle('data_batch_2')
    data_batch3 = unpickle('data_batch_3')
    data_batch4 = unpickle('data_batch_4')
    data_batch5 = unpickle('data_batch_5')
    test_batch  = unpickle('test_batch')
    
    # print(data_batch1.keys())
    
    
    #训练数据
    #axis=1 向右增广,axis=0 向下增广
    x_train = np.concatenate([data_batch1[b'data'],
                              data_batch2[b'data'],
                              data_batch3[b'data'],
                              data_batch4[b'data'],
                              data_batch5[b'data']],axis=0)
    
    y_train = np.concatenate([data_batch1.get(b'labels'),
                              data_batch2.get(b'labels'),
                              data_batch3.get(b'labels'),
                              data_batch4.get(b'labels'),
                              data_batch5.get(b'labels')],axis=0)
    
    
    #测试数据
    x_test = test_batch.get(b'data')
    
    y_test = test_batch.get(b'labels')

    


 
    # # 按列可视化x_train
    
    classifle = 10   #我要画10种类型的图片
    
    picture_num = 5  #每种类型画5张图
    
    classplot = 0    #我当前要画的类型
    
    classplot_y = 0  #每一种类型我要画5个,当前画到了第classplot_y个
    
    # #画一张彩色照片
    # # img0 = x_train[0]
    # # img_reshape = img0.reshape(3,32,32)
    # # r = image.fromarray(img_reshape[0]).convert('L')
    # # g = image.fromarray(img_reshape[1]).convert('L')
    # # b = image.fromarray(img_reshape[2]).convert('L')
    # # img_m = image.merge('RGB',(r,g,b))
    # # plt.imshow(img_m)
    # # plt.show()
 
    #   #-------第二题--------
    plt.figure(figsize=(5, 10))
    
    for classplot in range(classifle):
        j = 0
        #对5w张图片遍历 (事实上找寻的很少)  
        for i in y_train:
            # 遍历到我需要的类型
            if i ==  classplot :
                if classplot_y :
                    sub = plt.subplot(picture_num, classifle, classplot_y * classifle + classplot+1)
                else:
                    #第0行加标题
                    sub = plt.subplot(picture_num, classifle, classplot_y * classifle + classplot+1,
                                                title=name[classplot])
                                  
                   
                sub.axis("off")
                img0 = x_train[j]
                img_reshape = img0.reshape(3,32,32)
                r = image.fromarray(img_reshape[0]).convert('L')
                g = image.fromarray(img_reshape[1]).convert('L')
                b = image.fromarray(img_reshape[2]).convert('L')
                img_m = image.merge('RGB',(r,g,b))
                sub.imshow(img_m)
                classplot_y = classplot_y + 1;
                   
     
            j = j + 1
            
            if classplot_y >= 5 :  
                classplot_y = 0;
                break;
    
    
    
    #另外,直接import数据的话就用这种办法
    # x_train = x_train.reshape(50000, 3, 32, 32)
    
    # for classplot in range(classifle):
    #     pos = np.argwhere(y_train == classplot)[0:picture_num,0]
    #     for i in range(picture_num):
    #         plt.subplot(picture_num, classifle, i * classifle + classplot+1)
    #         plt.imshow(x_train[pos[i]])
    #         if i == 0:
    #             plt.title(name[classplot])
    #         plt.axis('off')   
                
                
    #-------第三题--------
    x_train = x_train[0:500]
    y_train = y_train[0:500]
    x_test = x_test[0:500]
    y_test = y_test[0:500]
    
    
    
     #-------第四题--------
    neigh = KNeighborsClassifier(n_neighbors=1)
    neigh.fit(x_train, y_train)
    y_test_predict = neigh.predict(x_test)
    
    #计算精度
    print("Accuracy1",metrics.accuracy_score(y_test,y_test_predict));
    
    
    #-------第五题--------
    
    #多次尝试后 选择n_neighbors=1
    
    
    #-------第六题--------  
    
    pca = PCA(n_components=2)
    pca.fit(x_train) 
    x_train_reduction = pca.transform(x_train)
    x_test_reduction  = pca.transform(x_test)
    
    
    #-------第七题-------- 
    knn_pca = KNeighborsClassifier(n_neighbors=1)
    knn_pca.fit(x_train_reduction,y_train)
    y_test_predict = knn_pca.predict(x_test_reduction)
        
    #计算精度
    print("Accuracy2",metrics.accuracy_score(y_test,y_test_predict));
        
        
    #-------第八题-------- 
    clf = svm.SVC(kernel = 'rbf', C = 1000, gamma=0.5)
    clf.fit(x_train_reduction,y_train)
    y_test_predict = clf.predict(x_test_reduction)
    
    #计算精度
    print("Accuracy3",metrics.accuracy_score(y_test,y_test_predict));



    #PCA可以实现降为,之后实现加快运算,但是以降低准确度为代价
    #降维后,knn和SVM的准确度都下降了,但是SVM下降的要更多。说明knn受降维的影响小,受主成分影响小,SVM反之。













你可能感兴趣的:(python课堂实验)