【机器学习】PCA主成分分析应用 手写数字识别

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
path = "./knn_num_data/%s/%s_%s.bmp"

data = []
target = []
for i in range(10):
    for j in range(1,501):
        #被训练的数据一定是二维的
        data.append(plt.imread(path%(i,i,j)))
        target.append(i)
        
data = np.array(data)
data = data.reshape(data.shape[0],-1)
target = np.array(target)

#方差过滤
from sklearn.feature_selection import VarianceThreshold
var = VarianceThreshold()
new_data=var.fit_transform(data)
X_train,X_test,y_train,y_test=train_test_split(new_data,target)
knn = KNeighborsClassifier(n_jobs=6).fit(X_train,y_train)
knn.score(X_train,y_train),knn.score(X_test,y_test)

(0.8013333333333333, 0.7096)


#主成分分析

from sklearn.decomposition import PCA
pca = PCA(n_components=150,whiten=True)
new_data = pca.fit_transform(data)
X_train,X_test,y_train,y_test=train_test_split(new_data,target)
svc = SVC(kernel='linear').fit(X_train,y_train)
svc.score(X_train,y_train),svc.score(X_test,y_test)
(1.0, 0.8776)

你可能感兴趣的:(【机器学习】PCA主成分分析应用 手写数字识别)