PCA简单小实例(二维降一维)
import numpy as np
import matplotlib.pyplot as plt
data = np.genfromtxt("data.csv",delimiter=",")
x_data = data[:,0]
y_data = data[:,1]
plt.scatter(x_data,y_data)
plt.show
print(x_data.shape)
(100,)
def zeroMean(dataMat):
meanVal = np.mean(dataMat,axis=0)
newData = dataMat - meanVal
return newData,meanVal
newData,meanVal = zeroMean(data)
covMat = np.cov(newData,rowvar=0)
print(covMat)
[[ 94.99190951 125.62024804]
[125.62024804 277.49520751]]
eigVals,eigVects = np.linalg.eig(np.mat(covMat))
print(eigVals)
print(eigVects)
[ 30.97826888 341.50884814]
[[-0.89098665 -0.45402951]
[ 0.45402951 -0.89098665]]
eigValIndice = np.argsort(eigVals)
eigValIndice
array([0, 1], dtype=int64)
top = 1
n_eigValIndice = eigValIndice[-1:-(top+1):-1]
n_eigValIndice
array([1], dtype=int64)
n_eigVect = eigVects[:,n_eigValIndice]
n_eigVect
matrix([[-0.45402951],
[-0.89098665]])
lowDDataMat = newData*n_eigVect
lowDDataMat
matrix([[ 44.02694787],
[ 1.49722533],
[ 3.35564513],
[ 1.73205523],
[-17.84406034],
[ -7.68710859],
[ -7.6311404 ],
[ 16.4703207 ],
[ -1.92574891],
[ -0.35289859],
[ 17.26071108],
[-11.13030667],
[ 11.73358623],
[ -6.54975245],
[-11.27989566],
[ 10.70315359],
[ -7.11092921],
[-27.10646295],
[ 22.80011879],
[ 18.73367246],
[-17.75232043],
[-48.32951839],
[ 14.46601408],
[ 22.36856421],
[ -3.66790828],
[ -6.5367338 ],
[-24.03511973],
[-26.696945 ],
[ -4.62040617],
[-10.25663202],
[ 6.87141913],
[ 33.98731832],
[ -0.49795028],
[-25.59431857],
[ -5.08219623],
[ 7.07400117],
[-10.3589004 ],
[ 26.81843789],
[ 15.13930397],
[ 15.74751369],
[ 19.86509486],
[ 15.18178022],
[ -9.56831765],
[ 12.3873416 ],
[-44.24922602],
[ 31.5989945 ],
[ 20.60936247],
[-15.69205803],
[ 24.68142619],
[-25.71953206],
[ -1.70011545],
[ 8.82426933],
[ 10.18596708],
[ 9.92767378],
[ -1.59054221],
[ -1.35713786],
[ -2.23178334],
[-18.42859066],
[-10.81013855],
[ 15.48471562],
[ 13.37698622],
[ 2.01510615],
[-13.45929748],
[ 11.60631421],
[ 9.47911818],
[ 29.10215592],
[-14.62799906],
[-24.1314861 ],
[ -2.92304672],
[ 23.86122122],
[ 27.79335708],
[ 5.96631636],
[ 1.4680452 ],
[ 15.95209101],
[-37.79638897],
[-10.36346217],
[-20.58776205],
[ 21.05647364],
[ -9.181336 ],
[ 29.38968703],
[-10.26417212],
[-12.86474835],
[ 21.79735392],
[ -8.94685441],
[-27.45655642],
[ -8.10672783],
[ 1.83592538],
[ 2.27166878],
[ 1.30641927],
[ 15.10178496],
[ 12.23038321],
[-13.67232328],
[-46.52044765],
[-14.2546318 ],
[-31.2297683 ],
[ -8.32905498],
[ 0.42750425],
[-11.625534 ],
[ 6.16748059],
[ 27.99826548]])
reconMat = (lowDDataMat*n_eigVect.T) + meanVal
reconMat
matrix([[ 28.96880808, 33.50762783],
[ 48.27855698, 71.40104277],
[ 47.43477956, 69.74521555],
[ 48.17193728, 71.19181247],
[ 57.06007136, 88.63387007],
[ 52.44851558, 79.58416168],
[ 52.42310436, 79.53429476],
[ 41.4803299 , 58.06021471],
[ 49.83268829, 74.45086712],
[ 49.11856784, 73.04947849],
[ 41.12146934, 57.35598744],
[ 54.0118291 , 82.65200519],
[ 43.6309471 , 62.28058188],
[ 51.93212233, 78.57079254],
[ 54.07974691, 82.78528698],
[ 44.09879393, 63.19868361],
[ 52.18691313, 79.07079354],
[ 61.26547544, 96.88654713],
[ 38.6064148 , 52.42044913],
[ 40.45270141, 56.04359851],
[ 57.01841873, 88.55213104],
[ 70.90136881, 115.79600617],
[ 42.39034424, 59.84602515],
[ 38.80235331, 52.80495849],
[ 50.62368004, 76.00310786],
[ 51.92621148, 78.55919309],
[ 59.87099499, 94.15002133],
[ 61.07954221, 96.52167211],
[ 51.05614219, 76.85177076],
[ 53.61515503, 81.87357274],
[ 45.83851443, 66.61270785],
[ 33.52709612, 42.45280371],
[ 49.18442558, 73.1787176 ],
[ 60.57891727, 95.53924668],
[ 51.2658085 , 77.26321954],
[ 45.74653621, 66.43220996],
[ 53.66158789, 81.96469251],
[ 36.78197936, 48.84018045],
[ 42.08465077, 59.24613285],
[ 41.80850561, 58.70422611],
[ 39.93900226, 55.03551626],
[ 42.06536529, 59.20828708],
[ 53.30263999, 81.26029383],
[ 43.33412288, 61.69809458],
[ 69.04879568, 112.16052014],
[ 34.61146561, 44.58076835],
[ 39.6010828 , 54.37238375],
[ 56.08299881, 86.71646474],
[ 37.75224573, 50.74422935],
[ 60.63576789, 95.65081023],
[ 49.73024404, 74.24983072],
[ 44.95186282, 64.8727444 ],
[ 44.33361186, 63.65948988],
[ 44.45088464, 63.88962576],
[ 49.68049456, 74.15220243],
[ 49.57452209, 73.94424227],
[ 49.97163695, 74.72353971],
[ 57.32546537, 89.15467878],
[ 53.86646332, 82.36673967],
[ 41.92782368, 58.93837568],
[ 42.88479502, 60.81633443],
[ 48.04342381, 70.93961788],
[ 55.06925964, 84.7271049 ],
[ 43.68873236, 62.39397956],
[ 44.65454212, 64.28928282],
[ 35.745104 , 46.80541819],
[ 55.59988464, 85.76840241],
[ 59.91474817, 94.23588248],
[ 50.28549092, 75.33944616],
[ 38.12464298, 51.47502102],
[ 36.33933729, 47.97154048],
[ 46.24945779, 67.41914234],
[ 48.29180562, 71.42704188],
[ 41.71562146, 58.52195044],
[ 66.11901726, 106.41112849],
[ 53.66365907, 81.96875698],
[ 58.30579289, 91.07847166],
[ 39.39808114, 53.97401367],
[ 53.12693891, 80.91549835],
[ 35.61455639, 46.54923181],
[ 53.61857845, 81.88029087],
[ 54.79931679, 84.19736957],
[ 39.06169964, 53.31389924],
[ 53.02047735, 80.70657838],
[ 61.4244282 , 97.19847574],
[ 52.63903509, 79.95803681],
[ 48.12477717, 71.09926555],
[ 47.92693681, 70.711024 ],
[ 48.36518856, 71.57104842],
[ 42.1016855 , 59.27956179],
[ 43.40538662, 61.83794241],
[ 55.16597964, 84.91690805],
[ 70.07999731, 114.18414829],
[ 55.43036489, 85.43573717],
[ 63.13757772, 100.56035714],
[ 52.73997818, 80.15612733],
[ 48.76424192, 72.35414998],
[ 54.23667692, 83.09324613],
[ 46.1581233 , 67.23990769],
[ 36.24630282, 47.78896982]])
data = np.genfromtxt("data.csv",delimiter=",")
x_data = data[:,0]
y_data = data[:,1]
plt.scatter(x_data,y_data)
plt.show
x_data = np.array(reconMat)[:,0]
y_data = np.array(reconMat)[:,1]
plt.scatter(x_data,y_data,c='r')
plt.show