机器学习—主成分分析PCA(编程部分1)

PCA简单小实例(二维降一维)

import numpy as np
import matplotlib.pyplot as plt
#载入数据
data = np.genfromtxt("data.csv",delimiter=",")
#print(data)
x_data = data[:,0]
y_data = data[:,1]
plt.scatter(x_data,y_data)
plt.show
print(x_data.shape)        #有多少个数据
(100,)

机器学习—主成分分析PCA(编程部分1)_第1张图片

# 数据中心化
def zeroMean(dataMat):
    #按列求平均,即各个特征的平均
    meanVal = np.mean(dataMat,axis=0)   #axis=0是表示一行一行按列求平均
    newData = dataMat - meanVal
    return newData,meanVal
newData,meanVal = zeroMean(data)
#np.cov用于求协方差,参数rowvar=0表示一行代表一个样本
covMat = np.cov(newData,rowvar=0)
print(covMat)
#print(newData)
[[ 94.99190951 125.62024804]
 [125.62024804 277.49520751]]
# np.linalg.eig求矩阵的特征值和特征向量
eigVals,eigVects = np.linalg.eig(np.mat(covMat))    #np.mat()创建矩阵
print(eigVals)
print(eigVects)
[ 30.97826888 341.50884814]
[[-0.89098665 -0.45402951]
 [ 0.45402951 -0.89098665]]
#对特征值从小到大排序
eigValIndice = np.argsort(eigVals)
eigValIndice     #输出结果0和1代表下表,0第一个数30.978,1是第二个数341.5088
array([0, 1], dtype=int64)
top = 1
# 10维降成5维,则top=5(最大的top个特征向量的下标)
n_eigValIndice = eigValIndice[-1:-(top+1):-1]
n_eigValIndice
array([1], dtype=int64)
# 最大的top个特征值对应的特征向量
n_eigVect = eigVects[:,n_eigValIndice]
n_eigVect
matrix([[-0.45402951],
        [-0.89098665]])
# 低维特征空间数据
lowDDataMat = newData*n_eigVect
lowDDataMat
matrix([[ 44.02694787],
        [  1.49722533],
        [  3.35564513],
        [  1.73205523],
        [-17.84406034],
        [ -7.68710859],
        [ -7.6311404 ],
        [ 16.4703207 ],
        [ -1.92574891],
        [ -0.35289859],
        [ 17.26071108],
        [-11.13030667],
        [ 11.73358623],
        [ -6.54975245],
        [-11.27989566],
        [ 10.70315359],
        [ -7.11092921],
        [-27.10646295],
        [ 22.80011879],
        [ 18.73367246],
        [-17.75232043],
        [-48.32951839],
        [ 14.46601408],
        [ 22.36856421],
        [ -3.66790828],
        [ -6.5367338 ],
        [-24.03511973],
        [-26.696945  ],
        [ -4.62040617],
        [-10.25663202],
        [  6.87141913],
        [ 33.98731832],
        [ -0.49795028],
        [-25.59431857],
        [ -5.08219623],
        [  7.07400117],
        [-10.3589004 ],
        [ 26.81843789],
        [ 15.13930397],
        [ 15.74751369],
        [ 19.86509486],
        [ 15.18178022],
        [ -9.56831765],
        [ 12.3873416 ],
        [-44.24922602],
        [ 31.5989945 ],
        [ 20.60936247],
        [-15.69205803],
        [ 24.68142619],
        [-25.71953206],
        [ -1.70011545],
        [  8.82426933],
        [ 10.18596708],
        [  9.92767378],
        [ -1.59054221],
        [ -1.35713786],
        [ -2.23178334],
        [-18.42859066],
        [-10.81013855],
        [ 15.48471562],
        [ 13.37698622],
        [  2.01510615],
        [-13.45929748],
        [ 11.60631421],
        [  9.47911818],
        [ 29.10215592],
        [-14.62799906],
        [-24.1314861 ],
        [ -2.92304672],
        [ 23.86122122],
        [ 27.79335708],
        [  5.96631636],
        [  1.4680452 ],
        [ 15.95209101],
        [-37.79638897],
        [-10.36346217],
        [-20.58776205],
        [ 21.05647364],
        [ -9.181336  ],
        [ 29.38968703],
        [-10.26417212],
        [-12.86474835],
        [ 21.79735392],
        [ -8.94685441],
        [-27.45655642],
        [ -8.10672783],
        [  1.83592538],
        [  2.27166878],
        [  1.30641927],
        [ 15.10178496],
        [ 12.23038321],
        [-13.67232328],
        [-46.52044765],
        [-14.2546318 ],
        [-31.2297683 ],
        [ -8.32905498],
        [  0.42750425],
        [-11.625534  ],
        [  6.16748059],
        [ 27.99826548]])
# 利用低维度数据来重构数据
reconMat = (lowDDataMat*n_eigVect.T) + meanVal
reconMat
matrix([[ 28.96880808,  33.50762783],
        [ 48.27855698,  71.40104277],
        [ 47.43477956,  69.74521555],
        [ 48.17193728,  71.19181247],
        [ 57.06007136,  88.63387007],
        [ 52.44851558,  79.58416168],
        [ 52.42310436,  79.53429476],
        [ 41.4803299 ,  58.06021471],
        [ 49.83268829,  74.45086712],
        [ 49.11856784,  73.04947849],
        [ 41.12146934,  57.35598744],
        [ 54.0118291 ,  82.65200519],
        [ 43.6309471 ,  62.28058188],
        [ 51.93212233,  78.57079254],
        [ 54.07974691,  82.78528698],
        [ 44.09879393,  63.19868361],
        [ 52.18691313,  79.07079354],
        [ 61.26547544,  96.88654713],
        [ 38.6064148 ,  52.42044913],
        [ 40.45270141,  56.04359851],
        [ 57.01841873,  88.55213104],
        [ 70.90136881, 115.79600617],
        [ 42.39034424,  59.84602515],
        [ 38.80235331,  52.80495849],
        [ 50.62368004,  76.00310786],
        [ 51.92621148,  78.55919309],
        [ 59.87099499,  94.15002133],
        [ 61.07954221,  96.52167211],
        [ 51.05614219,  76.85177076],
        [ 53.61515503,  81.87357274],
        [ 45.83851443,  66.61270785],
        [ 33.52709612,  42.45280371],
        [ 49.18442558,  73.1787176 ],
        [ 60.57891727,  95.53924668],
        [ 51.2658085 ,  77.26321954],
        [ 45.74653621,  66.43220996],
        [ 53.66158789,  81.96469251],
        [ 36.78197936,  48.84018045],
        [ 42.08465077,  59.24613285],
        [ 41.80850561,  58.70422611],
        [ 39.93900226,  55.03551626],
        [ 42.06536529,  59.20828708],
        [ 53.30263999,  81.26029383],
        [ 43.33412288,  61.69809458],
        [ 69.04879568, 112.16052014],
        [ 34.61146561,  44.58076835],
        [ 39.6010828 ,  54.37238375],
        [ 56.08299881,  86.71646474],
        [ 37.75224573,  50.74422935],
        [ 60.63576789,  95.65081023],
        [ 49.73024404,  74.24983072],
        [ 44.95186282,  64.8727444 ],
        [ 44.33361186,  63.65948988],
        [ 44.45088464,  63.88962576],
        [ 49.68049456,  74.15220243],
        [ 49.57452209,  73.94424227],
        [ 49.97163695,  74.72353971],
        [ 57.32546537,  89.15467878],
        [ 53.86646332,  82.36673967],
        [ 41.92782368,  58.93837568],
        [ 42.88479502,  60.81633443],
        [ 48.04342381,  70.93961788],
        [ 55.06925964,  84.7271049 ],
        [ 43.68873236,  62.39397956],
        [ 44.65454212,  64.28928282],
        [ 35.745104  ,  46.80541819],
        [ 55.59988464,  85.76840241],
        [ 59.91474817,  94.23588248],
        [ 50.28549092,  75.33944616],
        [ 38.12464298,  51.47502102],
        [ 36.33933729,  47.97154048],
        [ 46.24945779,  67.41914234],
        [ 48.29180562,  71.42704188],
        [ 41.71562146,  58.52195044],
        [ 66.11901726, 106.41112849],
        [ 53.66365907,  81.96875698],
        [ 58.30579289,  91.07847166],
        [ 39.39808114,  53.97401367],
        [ 53.12693891,  80.91549835],
        [ 35.61455639,  46.54923181],
        [ 53.61857845,  81.88029087],
        [ 54.79931679,  84.19736957],
        [ 39.06169964,  53.31389924],
        [ 53.02047735,  80.70657838],
        [ 61.4244282 ,  97.19847574],
        [ 52.63903509,  79.95803681],
        [ 48.12477717,  71.09926555],
        [ 47.92693681,  70.711024  ],
        [ 48.36518856,  71.57104842],
        [ 42.1016855 ,  59.27956179],
        [ 43.40538662,  61.83794241],
        [ 55.16597964,  84.91690805],
        [ 70.07999731, 114.18414829],
        [ 55.43036489,  85.43573717],
        [ 63.13757772, 100.56035714],
        [ 52.73997818,  80.15612733],
        [ 48.76424192,  72.35414998],
        [ 54.23667692,  83.09324613],
        [ 46.1581233 ,  67.23990769],
        [ 36.24630282,  47.78896982]])
#载入数据
data = np.genfromtxt("data.csv",delimiter=",")
x_data = data[:,0]
y_data = data[:,1]
plt.scatter(x_data,y_data)
plt.show

#重构数据
x_data = np.array(reconMat)[:,0]
y_data = np.array(reconMat)[:,1]
plt.scatter(x_data,y_data,c='r')
plt.show

机器学习—主成分分析PCA(编程部分1)_第2张图片

你可能感兴趣的:(机器学习—主成分分析PCA(编程部分1))