西瓜书《机器学习》课后答案——chapter10_10.6 PCA

试使用matlab的PCA函数对Yale人脸数据进行降维,并观察前20个特征向量所对应的图像。

Yale人脸数据共包含166张图片,每张图片的大小为320*243。X矩阵的大小为77760*166,占用的内存大小为103M字节(float对象占用24字节,其中真正用于保存值的空间为8字节大小,我们这里暂且用一个float占8字节计算);协方差矩阵的大小为77760*77760,占用的内存大小为48G字节。这也太大了!

采用10.5中介绍的计算协方差矩阵的特征值和特征向量的方法,曲线救国。

代码:

# -*- coding:gbk -*-
""" @Author: Victoria @Date: 2017.10.30 9:30 """
import cv2
import numpy as np
import operator
import os
import scipy.io as sio
from PIL import Image, ImageDraw
import sklearn.decomposition

class PCA():
    def __init__(self, d):
        self.d = d

    def train(self, X):
        """ Params: X: np.array with shape (n, N). """
        X = np.array(X, dtype="float64")
        X = self.normalize(X)

        cov_matrix_t = X.T.dot(X)
        eigen_values, eigen_vectors = np.linalg.eig(cov_matrix_t) # the column v[:,i] is the eigenvector 
        eigen_values = eigen_values.tolist()
        eigen_vectors = X.dot(eigen_vectors)
        eigen_vectors = eigen_vectors.T.tolist()

        eigen_zip = [(value.real, vector) for value, vector in zip(eigen_values, eigen_vectors)]
        print "len of real eigen values", len(eigen_zip)
        orderd_eigens = sorted(eigen_zip, key=operator.itemgetter(0), reverse=True)
        self.max_eigen_vectors = [orderd_eigens[i][1] for i in range(self.d)]
        self.W = np.real(np.array(self.max_eigen_vectors).T)        
        self.W /= np.sqrt(np.sum(self.W**2, axis=0, keepdims=True))

    def normalize(self, X):
        self.mean_vector = 1.0 / X.shape[1] * np.sum(X, axis=1, keepdims=True)     
        self.std = np.std(X, axis=1, keepdims=True) + 10e-8
        X = (X-self.mean_vector) # /(self.std)
        return X

    def dim_reduction(self, x):
        """ Params: x: array with shape (n,1) """
        return self.W.T.dot(x)

    def construct(self, c):
        """ Construct original image from encoder. """
        new =  self.W.dot(c)
        return (new)

def main():
    d = 20
    path = "../../数据/yalefaces/yalefaces"
    X = []
    n = 1
    for file in os.listdir(path):
        if not file.endswith(".txt") :
            img = Image.open(os.path.join(path, file))
            img = np.array(img).reshape(img.width*img.height)
            if n==1:
                first_img = img
            X.append(img)
            n+=1
    X = np.array(X)

    """ 调用sklearn中的PCA mu=np.mean(X, axis=0) pca1 = sklearn.decomposition.PCA() pca1.fit(X) xhat = np.dot(pca1.transform(first_img.reshape(1, -1))[:, :d], pca1.components_[:d, :]) print "std pca W: ", pca1.components_ xhat += mu img_array = np.array(xhat, dtype="int32").reshape(243, 320) img = Image.fromarray(img_array).convert("L") img.save("hh.png") print "std pca construct: ", img_array """

    X = X.T
    pca = PCA(d)
    pca.train(X)
    print "my pca W: ", pca.W       
    for file in os.listdir(path):
        if not file.endswith(".txt") :
            img = Image.open(os.path.join(path, file))
            w, h = img.width, img.height
            img = np.array(img).reshape(w*h, 1)
            img = img - pca.mean_vector # 很重要
            features = pca.dim_reduction(img)
            x_hat = pca.construct(features)
            x_hat += pca.mean_vector # 很重要
            x_hat = x_hat.reshape(h, w)
            x_hat = np.array(x_hat, dtype="int32")
            new_img = Image.fromarray(x_hat).convert('L')
            new_file = str(d)  + '_' + file.split('.')[0]  + '.png'
            new_img.save(os.path.join('yalefaces', new_file))

    # print (img_array == new_img_array).all() # 所有的元素为True才为True


if __name__=="__main__":
    main()

结果:
下面两张图分别是原图和降维之后的图,可以看出即使只有20维,保留的人脸信息也是比较充分的:
西瓜书《机器学习》课后答案——chapter10_10.6 PCA_第1张图片

你可能感兴趣的:(机器学习)