8-PCA降维实战

import numpy as np
import pandas as pd

df = pd.read_csv('./data/iris.data')
print(df.head())
   5.1  3.5  1.4  0.2  Iris-setosa
0  4.9  3.0  1.4  0.2  Iris-setosa
1  4.7  3.2  1.3  0.2  Iris-setosa
2  4.6  3.1  1.5  0.2  Iris-setosa
3  5.0  3.6  1.4  0.2  Iris-setosa
4  5.4  3.9  1.7  0.4  Iris-setosa
df.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
print(df.head())
   sepal_len  sepal_wid  petal_len  petal_wid        class
0        4.9        3.0        1.4        0.2  Iris-setosa
1        4.7        3.2        1.3        0.2  Iris-setosa
2        4.6        3.1        1.5        0.2  Iris-setosa
3        5.0        3.6        1.4        0.2  Iris-setosa
4        5.4        3.9        1.7        0.4  Iris-setosa

将数据分为数据X和数据标签Y

X = df.iloc[:,0:4].values
Y = df.iloc[:,4].values
from matplotlib import pyplot as plt
import math

label_dict = {1: 'Iris-Setosa',
              2: 'Iris-Versicolor',
              3: 'Iris-Virgnica'}

feature_dict = {0: 'sepal length [cm]',
                1: 'sepal width [cm]',
                2: 'petal length [cm]',
                3: 'petal width [cm]'}

plt.figure(figsize=(8, 6))
for cnt in range(4):
    plt.subplot(2, 2, cnt+1)
    for lab in ('Iris-setosa', 'Iris-versicolor', 'Iris-virginica'):
        plt.hist(X[y==lab, cnt],
                     label=lab,
                     bins=10,
                     alpha=0.3,)
    plt.xlabel(feature_dict[cnt])
    plt.legend(loc='upper right', fancybox=True, fontsize=8)

plt.tight_layout()
plt.show()

8-PCA降维实战_第1张图片

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X)
print (X_std)
[[-1.1483555  -0.11805969 -1.35396443 -1.32506301]
 [-1.3905423   0.34485856 -1.41098555 -1.32506301]
 [-1.51163569  0.11339944 -1.29694332 -1.32506301]
 [-1.02726211  1.27069504 -1.35396443 -1.32506301]
 [-0.54288852  1.9650724  -1.18290109 -1.0614657 ]
 [-1.51163569  0.8077768  -1.35396443 -1.19326436]
 [-1.02726211  0.8077768  -1.29694332 -1.32506301]
 [-1.75382249 -0.34951881 -1.35396443 -1.32506301]
 [-1.1483555   0.11339944 -1.29694332 -1.45686167]
 [-0.54288852  1.50215416 -1.29694332 -1.32506301]
 [-1.2694489   0.8077768  -1.23992221 -1.32506301]
 [-1.2694489  -0.11805969 -1.35396443 -1.45686167]
 [-1.87491588 -0.11805969 -1.52502777 -1.45686167]
 [-0.05851493  2.19653152 -1.46800666 -1.32506301]
 [-0.17960833  3.122368   -1.29694332 -1.0614657 ]
 [-0.54288852  1.9650724  -1.41098555 -1.0614657 ]
 [-0.90616871  1.03923592 -1.35396443 -1.19326436]
 [-0.17960833  1.73361328 -1.18290109 -1.19326436]
 [-0.90616871  1.73361328 -1.29694332 -1.19326436]
 [-0.54288852  0.8077768  -1.18290109 -1.32506301]
 [-0.90616871  1.50215416 -1.29694332 -1.0614657 ]
 [-1.51163569  1.27069504 -1.58204889 -1.32506301]
 [-0.90616871  0.57631768 -1.18290109 -0.92966704]
 [-1.2694489   0.8077768  -1.06885886 -1.32506301]
 [-1.02726211 -0.11805969 -1.23992221 -1.32506301]
 [-1.02726211  0.8077768  -1.23992221 -1.0614657 ]
 [-0.78507531  1.03923592 -1.29694332 -1.32506301]
 [-0.78507531  0.8077768  -1.35396443 -1.32506301]
 [-1.3905423   0.34485856 -1.23992221 -1.32506301]
 [-1.2694489   0.11339944 -1.23992221 -1.32506301]
 [-0.54288852  0.8077768  -1.29694332 -1.0614657 ]
 [-0.78507531  2.42799064 -1.29694332 -1.45686167]
 [-0.42179512  2.65944976 -1.35396443 -1.32506301]
 [-1.1483555   0.11339944 -1.29694332 -1.45686167]
 [-1.02726211  0.34485856 -1.46800666 -1.32506301]
 [-0.42179512  1.03923592 -1.41098555 -1.32506301]
 [-1.1483555   0.11339944 -1.29694332 -1.45686167]
 [-1.75382249 -0.11805969 -1.41098555 -1.32506301]
 [-0.90616871  0.8077768  -1.29694332 -1.32506301]
 [-1.02726211  1.03923592 -1.41098555 -1.19326436]
 [-1.63272909 -1.73827353 -1.41098555 -1.19326436]
 [-1.75382249  0.34485856 -1.41098555 -1.32506301]
 [-1.02726211  1.03923592 -1.23992221 -0.79786838]
 [-0.90616871  1.73361328 -1.06885886 -1.0614657 ]
 [-1.2694489  -0.11805969 -1.35396443 -1.19326436]
 [-0.90616871  1.73361328 -1.23992221 -1.32506301]
 [-1.51163569  0.34485856 -1.35396443 -1.32506301]
 [-0.66398191  1.50215416 -1.29694332 -1.32506301]
 [-1.02726211  0.57631768 -1.35396443 -1.32506301]
 [ 1.39460583  0.34485856  0.52773232  0.25652088]
 [ 0.66804545  0.34485856  0.41369009  0.38831953]
 [ 1.27351244  0.11339944  0.64177455  0.38831953]
 [-0.42179512 -1.73827353  0.12858453  0.12472222]
 [ 0.78913885 -0.58097793  0.47071121  0.38831953]
 [-0.17960833 -0.58097793  0.41369009  0.12472222]
 [ 0.54695205  0.57631768  0.52773232  0.52011819]
 [-1.1483555  -1.50681441 -0.27056327 -0.27067375]
 [ 0.91023225 -0.34951881  0.47071121  0.12472222]
 [-0.78507531 -0.81243705  0.07156341  0.25652088]
 [-1.02726211 -2.43265089 -0.15652104 -0.27067375]
 [ 0.06257847 -0.11805969  0.24262675  0.38831953]
 [ 0.18367186 -1.96973265  0.12858453 -0.27067375]
 [ 0.30476526 -0.34951881  0.52773232  0.25652088]
 [-0.30070172 -0.34951881 -0.09949993  0.12472222]
 [ 1.03132564  0.11339944  0.35666898  0.25652088]
 [-0.30070172 -0.11805969  0.41369009  0.38831953]
 [-0.05851493 -0.81243705  0.18560564 -0.27067375]
 [ 0.42585866 -1.96973265  0.41369009  0.38831953]
 [-0.30070172 -1.27535529  0.07156341 -0.1388751 ]
 [ 0.06257847  0.34485856  0.58475344  0.78371551]
 [ 0.30476526 -0.58097793  0.12858453  0.12472222]
 [ 0.54695205 -1.27535529  0.64177455  0.38831953]
 [ 0.30476526 -0.58097793  0.52773232 -0.00707644]
 [ 0.66804545 -0.34951881  0.29964787  0.12472222]
 [ 0.91023225 -0.11805969  0.35666898  0.25652088]
 [ 1.15241904 -0.58097793  0.58475344  0.25652088]
 [ 1.03132564 -0.11805969  0.69879566  0.65191685]
 [ 0.18367186 -0.34951881  0.41369009  0.38831953]
 [-0.17960833 -1.04389617 -0.15652104 -0.27067375]
 [-0.42179512 -1.50681441  0.0145423  -0.1388751 ]
 [-0.42179512 -1.50681441 -0.04247882 -0.27067375]
 [-0.05851493 -0.81243705  0.07156341 -0.00707644]
 [ 0.18367186 -0.81243705  0.75581678  0.52011819]
 [-0.54288852 -0.11805969  0.41369009  0.38831953]
 [ 0.18367186  0.8077768   0.41369009  0.52011819]
 [ 1.03132564  0.11339944  0.52773232  0.38831953]
 [ 0.54695205 -1.73827353  0.35666898  0.12472222]
 [-0.30070172 -0.11805969  0.18560564  0.12472222]
 [-0.42179512 -1.27535529  0.12858453  0.12472222]
 [-0.42179512 -1.04389617  0.35666898 -0.00707644]
 [ 0.30476526 -0.11805969  0.47071121  0.25652088]
 [-0.05851493 -1.04389617  0.12858453 -0.00707644]
 [-1.02726211 -1.73827353 -0.27056327 -0.27067375]
 [-0.30070172 -0.81243705  0.24262675  0.12472222]
 [-0.17960833 -0.11805969  0.24262675 -0.00707644]
 [-0.17960833 -0.34951881  0.24262675  0.12472222]
 [ 0.42585866 -0.34951881  0.29964787  0.12472222]
 [-0.90616871 -1.27535529 -0.44162661 -0.1388751 ]
 [-0.17960833 -0.58097793  0.18560564  0.12472222]
 [ 0.54695205  0.57631768  1.2690068   1.70630611]
 [-0.05851493 -0.81243705  0.75581678  0.91551417]
 [ 1.51569923 -0.11805969  1.21198569  1.17911148]
 [ 0.54695205 -0.34951881  1.04092235  0.78371551]
 [ 0.78913885 -0.11805969  1.15496457  1.31091014]
 [ 2.12116622 -0.11805969  1.61113348  1.17911148]
 [-1.1483555  -1.27535529  0.41369009  0.65191685]
 [ 1.75788602 -0.34951881  1.44007014  0.78371551]
 [ 1.03132564 -1.27535529  1.15496457  0.78371551]
 [ 1.63679263  1.27069504  1.32602791  1.70630611]
 [ 0.78913885  0.34485856  0.75581678  1.04731282]
 [ 0.66804545 -0.81243705  0.869859    0.91551417]
 [ 1.15241904 -0.11805969  0.98390123  1.17911148]
 [-0.17960833 -1.27535529  0.69879566  1.04731282]
 [-0.05851493 -0.58097793  0.75581678  1.57450745]
 [ 0.66804545  0.34485856  0.869859    1.4427088 ]
 [ 0.78913885 -0.11805969  0.98390123  0.78371551]
 [ 2.24225961  1.73361328  1.6681546   1.31091014]
 [ 2.24225961 -1.04389617  1.78219682  1.4427088 ]
 [ 0.18367186 -1.96973265  0.69879566  0.38831953]
 [ 1.27351244  0.34485856  1.09794346  1.4427088 ]
 [-0.30070172 -0.58097793  0.64177455  1.04731282]
 [ 2.24225961 -0.58097793  1.6681546   1.04731282]
 [ 0.54695205 -0.81243705  0.64177455  0.78371551]
 [ 1.03132564  0.57631768  1.09794346  1.17911148]
 [ 1.63679263  0.34485856  1.2690068   0.78371551]
 [ 0.42585866 -0.58097793  0.58475344  0.78371551]
 [ 0.30476526 -0.11805969  0.64177455  0.78371551]
 [ 0.66804545 -0.58097793  1.04092235  1.17911148]
 [ 1.63679263 -0.11805969  1.15496457  0.52011819]
 [ 1.87897942 -0.58097793  1.32602791  0.91551417]
 [ 2.48444641  1.73361328  1.49709126  1.04731282]
 [ 0.66804545 -0.58097793  1.04092235  1.31091014]
 [ 0.54695205 -0.58097793  0.75581678  0.38831953]
 [ 0.30476526 -1.04389617  1.04092235  0.25652088]
 [ 2.24225961 -0.11805969  1.32602791  1.4427088 ]
 [ 0.54695205  0.8077768   1.04092235  1.57450745]
 [ 0.66804545  0.11339944  0.98390123  0.78371551]
 [ 0.18367186 -0.11805969  0.58475344  0.78371551]
 [ 1.27351244  0.11339944  0.92688012  1.17911148]
 [ 1.03132564  0.11339944  1.04092235  1.57450745]
 [ 1.27351244  0.11339944  0.75581678  1.4427088 ]
 [-0.05851493 -0.81243705  0.75581678  0.91551417]
 [ 1.15241904  0.34485856  1.21198569  1.4427088 ]
 [ 1.03132564  0.57631768  1.09794346  1.70630611]
 [ 1.03132564 -0.11805969  0.81283789  1.4427088 ]
 [ 0.54695205 -1.27535529  0.69879566  0.91551417]
 [ 0.78913885 -0.11805969  0.81283789  1.04731282]
 [ 0.42585866  0.8077768   0.92688012  1.4427088 ]
 [ 0.06257847 -0.11805969  0.75581678  0.78371551]]

求解协方差

mean_vec = np.mean(X_std, axis=0)
print(mean_vec)
[ 2.38437160e-16  2.38437160e-17 -9.53748639e-17 -1.43062296e-16]
cov_mat = (X_std - mean_vec).T.dot(X_std - mean_vec) / (X_std.shape[0]-1)
print('Covariance matrix \n%s' %cov_mat)
Covariance matrix 
[[ 1.00675676 -0.10448539  0.87716999  0.82249094]
 [-0.10448539  1.00675676 -0.41802325 -0.35310295]
 [ 0.87716999 -0.41802325  1.00675676  0.96881642]
 [ 0.82249094 -0.35310295  0.96881642  1.00675676]]
print('NumPy covariance matrix: \n%s' %np.cov(X_std.T))
NumPy covariance matrix: 
[[ 1.00675676 -0.10448539  0.87716999  0.82249094]
 [-0.10448539  1.00675676 -0.41802325 -0.35310295]
 [ 0.87716999 -0.41802325  1.00675676  0.96881642]
 [ 0.82249094 -0.35310295  0.96881642  1.00675676]]

求解特征向量和特征值

cov_mat = np.cov(X_std.T)
eig_vals, eig_vecs = np.linalg.eig(cov_mat)

print('Eigenvectors \n%s' %eig_vecs)
print('Eigenvalues \n%s' %eig_vals)
Eigenvectors 
[[ 0.52308496 -0.36956962 -0.72154279  0.26301409]
 [-0.25956935 -0.92681168  0.2411952  -0.12437342]
 [ 0.58184289 -0.01912775  0.13962963 -0.80099722]
 [ 0.56609604 -0.06381646  0.63380158  0.52321917]]
Eigenvalues 
[2.92442837 0.93215233 0.14946373 0.02098259]
# Make a list of (eigenvalue, eigenvector) tuples
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
print (eig_pairs)
print ('----------')
# Sort the (eigenvalue, eigenvector) tuples from high to low
eig_pairs.sort(key=lambda x: x[0], reverse=True)

# Visually confirm that the list is correctly sorted by decreasing eigenvalues
print('Eigenvalues in descending order:')
for i in eig_pairs:
    print(i[0])
[(2.9244283691111144, array([ 0.52308496, -0.25956935,  0.58184289,  0.56609604])), (0.932152330253508, array([-0.36956962, -0.92681168, -0.01912775, -0.06381646])), (0.14946373489813417, array([-0.72154279,  0.2411952 ,  0.13962963,  0.63380158])), (0.02098259276427019, array([ 0.26301409, -0.12437342, -0.80099722,  0.52321917]))]
----------
Eigenvalues in descending order:
2.9244283691111144
0.932152330253508
0.14946373489813417
0.02098259276427019
tot = sum(eig_vals)
var_exp = [(i / tot)*100 for i in sorted(eig_vals, reverse=True)]
print (var_exp)
cum_var_exp = np.cumsum(var_exp)
cum_var_exp
[72.62003332692029, 23.147406858644157, 3.7115155645845395, 0.5210442498510046]





array([ 72.62003333,  95.76744019,  99.47895575, 100.        ])
plt.figure(figsize=(6, 4))

plt.bar(range(4), var_exp, alpha=0.5, align='center',
            label='individual explained variance')
plt.step(range(4), cum_var_exp, where='mid',
             label='cumulative explained variance')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal components')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

8-PCA降维实战_第2张图片

matrix_w = np.hstack((eig_pairs[0][1].reshape(4,1), eig_pairs[1][1].reshape(4,1)))
print('Matrix W:\n', matrix_w)
Matrix W:
 [[ 0.52308496 -0.36956962]
 [-0.25956935 -0.92681168]
 [ 0.58184289 -0.01912775]
 [ 0.56609604 -0.06381646]]
Y = X_std.dot(matrix_w)
Y
array([[-2.10795032,  0.64427554],
       [-2.38797131,  0.30583307],
       [-2.32487909,  0.56292316],
       [-2.40508635, -0.687591  ],
       [-2.08320351, -1.53025171],
       [-2.4636848 , -0.08795413],
       [-2.25174963, -0.25964365],
       [-2.3645813 ,  1.08255676],
       [-2.20946338,  0.43707676],
       [-2.17862017, -1.08221046],
       [-2.34525657, -0.17122946],
       [-2.24590315,  0.6974389 ],
       [-2.66214582,  0.92447316],
       [-2.2050227 , -1.90150522],
       [-2.25993023, -2.73492274],
       [-2.21591283, -1.52588897],
       [-2.20705382, -0.52623535],
       [-1.9077081 , -1.4415791 ],
       [-2.35411558, -1.17088308],
       [-1.93202643, -0.44083479],
       [-2.21942518, -0.96477499],
       [-2.79116421, -0.50421849],
       [-1.83814105, -0.11729122],
       [-2.24572458, -0.17450151],
       [-1.97825353,  0.59734172],
       [-2.06935091, -0.27755619],
       [-2.18514506, -0.56366755],
       [-2.15824269, -0.34805785],
       [-2.28843932,  0.30256102],
       [-2.16501749,  0.47232759],
       [-1.8491597 , -0.45547527],
       [-2.62023392, -1.84237072],
       [-2.44885384, -2.1984673 ],
       [-2.20946338,  0.43707676],
       [-2.23112223,  0.17266644],
       [-2.06147331, -0.6957435 ],
       [-2.20946338,  0.43707676],
       [-2.45783833,  0.86912843],
       [-2.1884075 , -0.30439609],
       [-2.30357329, -0.48039222],
       [-1.89932763,  2.31759817],
       [-2.57799771,  0.4400904 ],
       [-1.98020921, -0.50889705],
       [-2.14679556, -1.18365675],
       [-2.09668176,  0.68061705],
       [-2.39554894, -1.16356284],
       [-2.41813611,  0.34949483],
       [-2.24196231, -1.03745802],
       [-2.22484727, -0.04403395],
       [ 1.09225538, -0.86148748],
       [ 0.72045861, -0.59920238],
       [ 1.2299583 , -0.61280832],
       [ 0.37598859,  1.756516  ],
       [ 1.05729685,  0.21303055],
       [ 0.36816104,  0.58896262],
       [ 0.73800214, -0.77956125],
       [-0.52021731,  1.84337921],
       [ 0.9113379 , -0.02941906],
       [-0.01292322,  1.02537703],
       [-0.15020174,  2.65452146],
       [ 0.42437533,  0.05686991],
       [ 0.52894687,  1.77250558],
       [ 0.70241525,  0.18484154],
       [-0.05385675,  0.42901221],
       [ 0.86277668, -0.50943908],
       [ 0.33388091,  0.18785518],
       [ 0.13504146,  0.7883247 ],
       [ 1.19457128,  1.63549265],
       [ 0.13677262,  1.30063807],
       [ 0.72711201, -0.40394501],
       [ 0.45564294,  0.41540628],
       [ 1.21038365,  0.94282042],
       [ 0.61327355,  0.4161824 ],
       [ 0.68512164,  0.06335788],
       [ 0.85951424, -0.25016762],
       [ 1.23906722,  0.08500278],
       [ 1.34575245, -0.32669695],
       [ 0.64732915,  0.22336443],
       [-0.06728496,  1.05414028],
       [ 0.10033285,  1.56100021],
       [-0.00745518,  1.57050182],
       [ 0.2179082 ,  0.77368423],
       [ 1.04116321,  0.63744742],
       [ 0.20719664,  0.27736006],
       [ 0.42154138, -0.85764157],
       [ 1.03691937, -0.52112206],
       [ 1.015435  ,  1.39413373],
       [ 0.0519502 ,  0.20903977],
       [ 0.25582921,  1.32747797],
       [ 0.25384813,  1.11700714],
       [ 0.60915822, -0.02858679],
       [ 0.31116522,  0.98711256],
       [-0.39679548,  2.01314578],
       [ 0.26536661,  0.85150613],
       [ 0.07385897,  0.17160757],
       [ 0.20854936,  0.37771566],
       [ 0.55843737,  0.15286277],
       [-0.47853403,  1.53421644],
       [ 0.23545172,  0.59332536],
       [ 1.8408037 , -0.86943848],
       [ 1.13831104,  0.70171953],
       [ 2.19615974, -0.54916658],
       [ 1.42613827,  0.05187679],
       [ 1.8575403 , -0.28797217],
       [ 2.74511173, -0.78056359],
       [ 0.34010583,  1.5568955 ],
       [ 2.29180093, -0.40328242],
       [ 1.98618025,  0.72876171],
       [ 2.26382116, -1.91685818],
       [ 1.35591821, -0.69255356],
       [ 1.58471851,  0.43102351],
       [ 1.87342402, -0.41054652],
       [ 1.23656166,  1.16818977],
       [ 1.45128483,  0.4451459 ],
       [ 1.58276283, -0.67521526],
       [ 1.45956552, -0.25105642],
       [ 2.43560434, -2.55096977],
       [ 3.29752602,  0.01266612],
       [ 1.23377366,  1.71954411],
       [ 2.03218282, -0.90334021],
       [ 0.95980311,  0.57047585],
       [ 2.88717988, -0.38895776],
       [ 1.31405636,  0.48854962],
       [ 1.69619746, -1.01153249],
       [ 1.94868773, -0.99881497],
       [ 1.1574572 ,  0.31987373],
       [ 1.007133  , -0.06550254],
       [ 1.7733922 ,  0.19641059],
       [ 1.85327106, -0.55077372],
       [ 2.4234788 , -0.2397454 ],
       [ 2.31353522, -2.62038074],
       [ 1.84800289,  0.18799967],
       [ 1.09649923,  0.29708201],
       [ 1.1812503 ,  0.81858241],
       [ 2.79178861, -0.83668445],
       [ 1.57340399, -1.07118383],
       [ 1.33614369, -0.420823  ],
       [ 0.91061354, -0.01965942],
       [ 1.84350913, -0.66872729],
       [ 2.00701161, -0.60663655],
       [ 1.89319854, -0.68227708],
       [ 1.13831104,  0.70171953],
       [ 2.03519535, -0.86076914],
       [ 1.99464025, -1.04517619],
       [ 1.85977129, -0.37934387],
       [ 1.54200377,  0.90808604],
       [ 1.50925493, -0.26460621],
       [ 1.3690965 , -1.01583909],
       [ 0.94680339,  0.02182097]])
plt.figure(figsize=(6, 4))
for lab, col in zip(('Iris-setosa', 'Iris-versicolor', 'Iris-virginica'),
                        ('blue', 'red', 'green')):
     plt.scatter(X[y==lab, 0],
                X[y==lab, 1],
                label=lab,
                c=col)
plt.xlabel('sepal_len')
plt.ylabel('sepal_wid')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

8-PCA降维实战_第3张图片

plt.figure(figsize=(6, 4))
for lab, col in zip(('Iris-setosa', 'Iris-versicolor', 'Iris-virginica'),
                        ('blue', 'red', 'green')):
     plt.scatter(Y[y==lab, 0],
                Y[y==lab, 1],
                label=lab,
                c=col)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(loc='lower center')
plt.tight_layout()
plt.show()

8-PCA降维实战_第4张图片


你可能感兴趣的:(机器学习实战)