统计学习方法第十六章作业:PCA主成分分析算法 代码实现

PCA主成分分析

import numpy as np
class PCA:
    def __init__(self,x,R=None):
        self.x = np.array(x)
        self.dim = self.x.shape[-1]
        self.num = self.x.shape[0]
        self.R = R
        self.X = None
        self.r_list = None
        self.r_v = None
        self.nk = None
        self.X_ = None

    def standetlize(self):
        self.X = []
        for i in range(self.dim):
            mean = np.mean(self.x[:,i])
            sii = 1/(self.num-1) * np.sum((self.x[:,i] - mean)**2)
            self.X.append((self.x[:, i]-mean)/sii)
        self.X = np.transpose(self.X)

    def get_R(self):
        self.R = 1/(self.num-1)*(self.X.T.dot(self.X))

    def get_r_rv(self,x):
        x = np.array(x)
        s_matrix = x.T.dot(x)
        r_list, r_v = np.linalg.eig(s_matrix)
        index_rank = np.argsort(-r_list)
        r_list = r_list[index_rank]
        r_v = r_v.T[index_rank]
        return r_list,r_v

    def fit(self,k=None,sup=None,way='R'):
        self.standetlize()
        if way == 'R':
            self.get_R()
            r_list,r_v = self.get_r_rv(self.R)
            nk = r_list/np.sum(r_list)
            if sup:
                for i in range(len(nk)):
                    if np.sum(nk[:i+1]) > sup:
                        k = i+1
                        break
            if k:
                r_v = r_v[:k]
                r_list = r_list[:k]
                self.r_v = r_v
                self.r_list = r_list
                self.nk = nk[:k]
                y = []
                for i in range(k):
                    y.append(r_v[i].dot(self.X.T))
                y = np.transpose(y)

        if way == 'SVD':
            self.X_ = 1/np.sqrt(self.num-1)*self.X
            u,s,v = np.linalg.svd(self.X_)
            y = v[:,:k].T.dot(self.X.T)
            y = np.transpose(y)
        return y


    def get_rv(self):
        return self.r_v

    def get_r_list(self):
        return self.r_list

    def get_nk(self):
        return self.nk

    def get_factor_loading(self):
        return [np.sqrt(self.r_list[i])*(self.r_v[i]) for i in range(len(self.r_list))]

    def get_vi(self):
        return np.sum([(np.sqrt(self.r_list[i]) * (self.r_v[i]))**2 for i in range(len(self.r_list))],0)

def main():
    x = [[2,9,2],
         [3,4,5],
         [3,5,2],
         [4,5,9],
         [5,6,1],
         [7,8,0]]
    pca = PCA(x)
    print(pca.fit(sup=0.7))
    print(pca.get_factor_loading())
    print(pca.get_vi())
    print(pca.fit(k=2,way="SVD"))

if __name__ == '__main__':
    main()

#----------result----------------
/usr/bin/python3 /Users/zhengyanzhao/PycharmProjects/tongjixuexi/shixian2/PVC.py
[[ 0.01436405 -0.98056186]
 [-0.62392994  0.24417043]
 [-0.37825673  0.00160078]
 [-0.35639953  0.32072976]
 [ 0.27000345  0.20323966]
 [ 1.0742187   0.21082123]]
[array([ 0.27913238,  0.22163074, -0.1237391 ]), array([ 0.15944099, -0.1768521 ,  0.04290707])]
[0.10333632 0.08039685 0.01715238]
[[ 0.01887854 -0.95392328]
 [-0.58760185  0.29494971]
 [-0.44973126  0.00587801]
 [-0.13200766  0.41115041]
 [ 0.17525954  0.14729349]
 [ 0.97520268  0.09465167]]

你可能感兴趣的:(统计学习方法,算法,python,机器学习)