数据挖掘-数据降维 python实现

PCA

# -*-coding:utf-8-*-

"""
    Author: Thinkgamer
    Desc:
        代码4-3  PCA降维
"""
import numpy as np
from sklearn import datasets

class PCATest:
    def __init__(self):
        pass

    # 加载鸢尾花数据集中的特征作为PCA的原始数据集 并进行标准化
    def loadIris(self):
        data = datasets.load_iris()["data"]
        return data

    # 标准化数据
    def Standard(self,data):
        # axis=0按列取均值
        mean_vector=np.mean(data,axis=0)
        return mean_vector,data - mean_vector

    # 计算协方差矩阵
    def getCovMatrix(self,newData):
        # rowvar=0表示数据的每一列代表一个feature
        return np.cov(newData,rowvar=0)

    # 计算协方差矩阵的特征值和特征向量
    def getFValueAndFVector(self,covMatrix):
        fValue,fVector = np.linalg.eig(covMatrix)
        return fValue,fVector

    # 得到特征向量矩阵
    def getVectorMatrix(self,fValue,fVector,k):
        fValueSort = np.argsort(fValue)
        fValueTopN = fValueSort[:-(k + 1):-1]
        return fVector[:,fValueTopN]

    # 得到降维后的数据
    def getResult(self,data,vectorMatrix):
        return np.dot(data,vectorMatrix)

if __name__ == "__main__":
    # 创建PCA对象
    pcatest = PCATest()
    # 加载iris数据集
    data = pcatest.loadIris()
    # 归一化数据
    mean_vector,newData = pcatest.Standard(data)
    # 得到协方差矩阵
    covMatrix = pcatest.getCovMatrix(newData)
    print("协方差矩阵为:\n {}".format(covMatrix))
    # 得到特征值和特征向量
    fValue, fVector = pcatest.getFValueAndFVector(covMatrix)
    print("特征值为:{}".format(fValue))
    print("特征向量为:\n{}".format(fVector))
    # 得到要降到k维的特征向量矩阵
    vectorMatrix = pcatest.getVectorMatrix(fValue, fVector, k=2)
    print("K维特征向量矩阵为:\n{}".format(vectorMatrix))
    # 计算结果
    result = pcatest.getResult(newData,vectorMatrix)
    print("最终降维结果为:\n{}".format(result))
    # 得到重构数据
    print("最终重构结果为:\n{}".format( np.mat(result) * vectorMatrix.T + mean_vector) )

结果

协方差矩阵为:
 [[ 0.68569351 -0.042434    1.27431544  0.51627069]
 [-0.042434    0.18997942 -0.32965638 -0.12163937]
 [ 1.27431544 -0.32965638  3.11627785  1.2956094 ]
 [ 0.51627069 -0.12163937  1.2956094   0.58100626]]
特征值为:[4.22824171 0.24267075 0.0782095  0.02383509]
特征向量为:
[[ 0.36138659 -0.65658877 -0.58202985  0.31548719]
 [-0.08452251 -0.73016143  0.59791083 -0.3197231 ]
 [ 0.85667061  0.17337266  0.07623608 -0.47983899]
 [ 0.3582892   0.07548102  0.54583143  0.75365743]]
K维特征向量矩阵为:
[[ 0.36138659 -0.65658877]
 [-0.08452251 -0.73016143]
 [ 0.85667061  0.17337266]
 [ 0.3582892   0.07548102]]
最终降维结果为:
[[-2.68412563 -0.31939725]
 [-2.71414169  0.17700123]
 [-2.88899057  0.14494943]
 [-2.74534286  0.31829898]
 [-2.72871654 -0.32675451]
 [-2.28085963 -0.74133045]
 [-2.82053775  0.08946138]
 [-2.62614497 -0.16338496]
 [-2.88638273  0.57831175]
 [-2.6727558   0.11377425]
 [-2.50694709 -0.6450689 ]
 [-2.61275523 -0.01472994]
 [-2.78610927  0.235112  ]
 [-3.22380374  0.51139459]
 [-2.64475039 -1.17876464]
 [-2.38603903 -1.33806233]
 [-2.62352788 -0.81067951]
 [-2.64829671 -0.31184914]
 [-2.19982032 -0.87283904]
 [-2.5879864  -0.51356031]
 [-2.31025622 -0.39134594]
 [-2.54370523 -0.43299606]
 [-3.21593942 -0.13346807]
 [-2.30273318 -0.09870885]
 [-2.35575405  0.03728186]
 [-2.50666891  0.14601688]
 [-2.46882007 -0.13095149]
 [-2.56231991 -0.36771886]
 [-2.63953472 -0.31203998]
 [-2.63198939  0.19696122]
 [-2.58739848  0.20431849]
 [-2.4099325  -0.41092426]
 [-2.64886233 -0.81336382]
 [-2.59873675 -1.09314576]
 [-2.63692688  0.12132235]
 [-2.86624165 -0.06936447]
 [-2.62523805 -0.59937002]
 [-2.80068412 -0.26864374]
 [-2.98050204  0.48795834]
 [-2.59000631 -0.22904384]
 [-2.77010243 -0.26352753]
 [-2.84936871  0.94096057]
 [-2.99740655  0.34192606]
 [-2.40561449 -0.18887143]
 [-2.20948924 -0.43666314]
 [-2.71445143  0.2502082 ]
 [-2.53814826 -0.50377114]
 [-2.83946217  0.22794557]
 [-2.54308575 -0.57941002]
 [-2.70335978 -0.10770608]
 [ 1.28482569 -0.68516047]
 [ 0.93248853 -0.31833364]
 [ 1.46430232 -0.50426282]
 [ 0.18331772  0.82795901]
 [ 1.08810326 -0.07459068]
 [ 0.64166908  0.41824687]
 [ 1.09506066 -0.28346827]
 [-0.74912267  1.00489096]
 [ 1.04413183 -0.2283619 ]
 [-0.0087454   0.72308191]
 [-0.50784088  1.26597119]
 [ 0.51169856  0.10398124]
 [ 0.26497651  0.55003646]
 [ 0.98493451  0.12481785]
 [-0.17392537  0.25485421]
 [ 0.92786078 -0.46717949]
 [ 0.66028376  0.35296967]
 [ 0.23610499  0.33361077]
 [ 0.94473373  0.54314555]
 [ 0.04522698  0.58383438]
 [ 1.11628318  0.08461685]
 [ 0.35788842  0.06892503]
 [ 1.29818388  0.32778731]
 [ 0.92172892  0.18273779]
 [ 0.71485333 -0.14905594]
 [ 0.90017437 -0.32850447]
 [ 1.33202444 -0.24444088]
 [ 1.55780216 -0.26749545]
 [ 0.81329065  0.1633503 ]
 [-0.30558378  0.36826219]
 [-0.06812649  0.70517213]
 [-0.18962247  0.68028676]
 [ 0.13642871  0.31403244]
 [ 1.38002644  0.42095429]
 [ 0.58800644  0.48428742]
 [ 0.80685831 -0.19418231]
 [ 1.22069088 -0.40761959]
 [ 0.81509524  0.37203706]
 [ 0.24595768  0.2685244 ]
 [ 0.16641322  0.68192672]
 [ 0.46480029  0.67071154]
 [ 0.8908152   0.03446444]
 [ 0.23054802  0.40438585]
 [-0.70453176  1.01224823]
 [ 0.35698149  0.50491009]
 [ 0.33193448  0.21265468]
 [ 0.37621565  0.29321893]
 [ 0.64257601 -0.01773819]
 [-0.90646986  0.75609337]
 [ 0.29900084  0.34889781]
 [ 2.53119273  0.00984911]
 [ 1.41523588  0.57491635]
 [ 2.61667602 -0.34390315]
 [ 1.97153105  0.1797279 ]
 [ 2.35000592  0.04026095]
 [ 3.39703874 -0.55083667]
 [ 0.52123224  1.19275873]
 [ 2.93258707 -0.3555    ]
 [ 2.32122882  0.2438315 ]
 [ 2.91675097 -0.78279195]
 [ 1.66177415 -0.24222841]
 [ 1.80340195  0.21563762]
 [ 2.1655918  -0.21627559]
 [ 1.34616358  0.77681835]
 [ 1.58592822  0.53964071]
 [ 1.90445637 -0.11925069]
 [ 1.94968906 -0.04194326]
 [ 3.48705536 -1.17573933]
 [ 3.79564542 -0.25732297]
 [ 1.30079171  0.76114964]
 [ 2.42781791 -0.37819601]
 [ 1.19900111  0.60609153]
 [ 3.49992004 -0.4606741 ]
 [ 1.38876613  0.20439933]
 [ 2.2754305  -0.33499061]
 [ 2.61409047 -0.56090136]
 [ 1.25850816  0.17970479]
 [ 1.29113206  0.11666865]
 [ 2.12360872  0.20972948]
 [ 2.38800302 -0.4646398 ]
 [ 2.84167278 -0.37526917]
 [ 3.23067366 -1.37416509]
 [ 2.15943764  0.21727758]
 [ 1.44416124  0.14341341]
 [ 1.78129481  0.49990168]
 [ 3.07649993 -0.68808568]
 [ 2.14424331 -0.1400642 ]
 [ 1.90509815 -0.04930053]
 [ 1.16932634  0.16499026]
 [ 2.10761114 -0.37228787]
 [ 2.31415471 -0.18365128]
 [ 1.9222678  -0.40920347]
 [ 1.41523588  0.57491635]
 [ 2.56301338 -0.2778626 ]
 [ 2.41874618 -0.3047982 ]
 [ 1.94410979 -0.1875323 ]
 [ 1.52716661  0.37531698]
 [ 1.76434572 -0.07885885]
 [ 1.90094161 -0.11662796]
 [ 1.39018886  0.28266094]]
最终重构结果为:
[[5.08303897 3.51741393 1.40321372 0.21353169]
 [4.7462619  3.15749994 1.46356177 0.24024592]
 [4.70411871 3.1956816  1.30821697 0.17518015]
 [4.6422117  3.05696697 1.46132981 0.23973218]
 [5.07175511 3.52655486 1.36373845 0.19699991]
 [5.50581049 3.79140823 1.67552816 0.32616959]
 [4.76528947 3.23041102 1.35723837 0.19551776]
 [5.00155648 3.39859911 1.47993231 0.2460815 ]
 [4.42052031 2.87903672 1.3855842  0.20882514]
 [4.80273233 3.20016781 1.48805402 0.2503016 ]
 [5.36090126 3.74023124 1.4985348  0.25243081]
 [4.90879014 3.28892521 1.51717562 0.26209953]
 [4.6820989  3.12115258 1.41198408 0.21884697]
 [4.34251794 2.95641673 1.08492393 0.08287986]
 [5.66151963 4.14156276 1.28795452 0.16277348]
 [5.85960752 4.23600886 1.48196707 0.24344301]
 [5.4275086  3.87100742 1.36995112 0.19816072]
 [5.09103106 3.50887425 1.43521594 0.22693854]
 [5.62144408 3.88058108 1.72215216 0.34527869]
 [5.24526768 3.65105838 1.4519108  0.23332171]
 [5.26539106 3.53834771 1.71102272 0.3420543 ]
 [5.20837272 3.58849072 1.50381282 0.25526824]
 [4.76876958 3.42660585 0.97985952 0.03702268]
 [5.07596756 3.32403953 1.76820275 0.36683827]
 [4.96751656 3.22922581 1.74635841 0.35810618]
 [4.84158376 3.1625874  1.63592576 0.31224245]
 [5.03711614 3.36161994 1.620331   0.30489742]
 [5.15878535 3.54240118 1.49918346 0.253526  ]
 [5.09432283 3.508273   1.44268899 0.23006346]
 [4.76284513 3.1359822  1.53739975 0.2711868 ]
 [4.77412899 3.12684127 1.57687502 0.28771858]
 [5.2422243  3.56106794 1.62223863 0.30486357]
 [5.42011555 3.87510873 1.34778245 0.18888104]
 [5.62193195 4.07515797 1.34221702 0.18572227]
 [4.81072442 3.19162812 1.52005623 0.26370845]
 [4.85305596 3.35024255 1.29054912 0.16715421]
 [5.28814713 3.71686193 1.40512135 0.21349784]
 [5.00759211 3.49020749 1.31216076 0.17560097]
 [4.44583189 2.95296449 1.28929015 0.16828324]
 [5.05772739 3.44348615 1.49950778 0.25407359]
 [5.01528468 3.483887   1.33924621 0.18694423]
 [4.19578554 2.61111602 1.48016643 0.24945997]
 [4.53560599 3.06102045 1.24949055 0.15120388]
 [5.09798737 3.39856855 1.66443564 0.32317144]
 [5.33156166 3.5629195  1.78949006 0.37473743]
 [4.69808309 3.10407321 1.47598851 0.24566068]
 [5.25685106 3.63969827 1.49630285 0.25191707]
 [4.66752328 3.13089475 1.36503575 0.19919028]
 [5.30473036 3.69534419 1.47895933 0.24443872]
 [4.93709396 3.36447093 1.42343785 0.22261896]
 [6.75752078 3.44901439 4.73988431 1.60795589]
 [6.38933648 3.210952   4.50164517 1.50940575]
 [6.70360586 3.30176008 4.92499937 1.68591477]
 [5.36595331 2.43729512 4.05858836 1.32750928]
 [6.28553466 3.01982734 4.67721409 1.5835588 ]
 [5.80060774 2.69771011 4.38021162 1.46080613]
 [6.42519566 3.17175365 4.64696063 1.57028526]
 [4.91281032 2.38691844 3.29046925 1.00678097]
 [6.37060843 3.13582174 4.61288533 1.5561975 ]
 [5.3654054  2.530106   3.8758707  1.25077891]
 [4.82858398 2.17589398 3.54243244 1.11293623]
 [5.95998142 2.9381602  4.21438462 1.39051801]
 [5.57794452 2.63332144 4.08035887 1.33578887]
 [6.11732146 2.88294701 4.62340445 1.56164611]
 [5.61314462 2.88594923 3.653188   1.15625441]
 [6.48539459 3.32002466 4.4718749  1.49651264]
 [5.85019511 2.74379965 4.38484098 1.46254838]
 [5.70961343 2.79378743 4.01810319 1.30910848]
 [5.82812417 2.58089813 4.66149221 1.5788184 ]
 [5.47633866 2.62721729 3.89796554 1.25960608]
 [6.19118463 2.90119831 4.72895723 1.6056725 ]
 [5.92741401 2.9767573  4.0765422  1.33276342]
 [6.09725811 2.70826992 4.92694533 1.68920031]
 [6.05645022 2.8459984  4.57929981 1.54337208]
 [6.1995402  3.10574704 4.34455161 1.44420666]
 [6.38433663 3.22110963 4.47219923 1.49706023]
 [6.48520624 3.12322858 4.85672682 1.65813265]
 [6.58193665 3.12097884 5.04614692 1.73728619]
 [6.0299917  2.86931987 4.48304267 1.50305643]
 [5.49110263 2.81427119 3.56006196 1.11764277]
 [5.35570523 2.54820206 3.82189561 1.22815146]
 [5.32813766 2.57664154 3.71349913 1.18274239]
 [5.68644667 2.81650766 3.92931911 1.27191776]
 [6.06566253 2.63332544 5.01321005 1.72555596]
 [5.7378533  2.65402555 4.34569004 1.4465642 ]
 [6.26241904 3.13092008 4.41554589 1.47376487]
 [6.5521131  3.25178558 4.7330599  1.60592615]
 [5.89362247 2.71679232 4.52076919 1.51945489]
 [5.75590904 2.84047821 4.0152595  1.30772581]
 [5.45572741 2.54535107 4.01878876 1.31042992]
 [5.57092426 2.52831954 4.27246379 1.41649225]
 [6.14263303 2.95687479 4.52711039 1.52110421]
 [5.66113509 2.74257988 4.02561317 1.31245966]
 [4.92409418 2.37777751 3.32994452 1.02331274]
 [5.64082336 2.65849448 4.15135316 1.36534708]
 [5.82366333 2.87400515 4.07922702 1.33431326]
 [5.78676837 2.81143749 4.13112904 1.3562598 ]
 [6.08719838 3.01597294 4.30540066 1.42822248]
 [5.01930486 2.58188023 3.11253983 0.93162567]
 [5.72230585 2.77730931 4.07463457 1.33279727]
 [6.75160563 2.83619912 5.92810597 2.10697576]
 [5.97729699 2.51793229 5.07006575 1.74979233]
 [7.01476791 3.08727012 5.94000602 2.11090192]
 [6.4378109  2.75946419 5.47811261 1.91927766]
 [6.66615908 2.82930793 5.77816114 2.04435401]
 [7.43265076 3.17240677 6.57264321 2.3748779 ]
 [5.24854769 2.14237105 4.4113161  1.47611586]
 [7.13654829 3.06903609 6.20862716 2.2232141 ]
 [6.52209728 2.68310088 5.78880221 2.04940919]
 [7.41138043 3.3823667  6.12098009 2.18528776]
 [6.60292068 3.09374185 5.13959729 1.77644541]
 [6.35347338 2.74745499 5.34030711 1.86174932]
 [6.76795329 3.03220816 5.57570256 1.95891678]
 [5.8197686  2.3763494  5.04589783 1.74028424]
 [6.0621445  2.52926185 5.21017704 1.80828691]
 [6.609877   2.98343615 5.36881699 1.87267832]
 [6.57546429 2.92316606 5.42096949 1.89471994]
 [7.87548563 3.62107816 6.54141677 2.3599616 ]
 [7.38398407 2.92440315 6.96500509 2.53984908]
 [5.81365971 2.39162504 5.00431256 1.7228453 ]
 [6.96903343 3.1282722  5.77227139 2.04064764]
 [5.87868336 2.51344609 4.89022871 1.67467088]
 [7.41063095 3.09787775 6.67641033 2.41854472]
 [6.21100849 2.79070682 4.98315238 1.71234151]
 [6.88559448 3.10960545 5.64921622 1.98931007]
 [7.15631211 3.24593237 5.90016951 2.0935963 ]
 [6.18014916 2.81974755 4.86728285 1.66380751]
 [6.23332782 2.86301666 4.88430204 1.67073827]
 [6.47307103 2.72470421 5.61359453 1.97602999]
 [7.01140288 3.19475538 5.72317615 2.01985753]
 [7.11667329 3.09115508 6.12731613 2.18914829]
 [7.91311684 3.78763102 6.2873805  2.25312542]
 [6.48106312 2.71616453 5.64559675 1.98943685]
 [6.27107021 2.83055425 5.02003445 1.7275857 ]
 [6.15883956 2.54176489 5.37065219 1.87528511]
 [7.40692849 3.29971345 6.27425181 2.24967262]
 [6.7101987  2.97836598 5.57062692 1.95702036]
 [6.56418043 2.93230699 5.38149422 1.87818817]
 [6.15758144 2.8380294  4.7883323  1.63074396]
 [6.84943578 3.15102299 5.49898398 1.92636697]
 [6.80022118 2.99583024 5.7086282  2.01460778]
 [6.80669354 3.19364302 5.33380563 1.85717403]
 [5.97729699 2.51793229 5.07006575 1.74979233]
 [6.95201347 3.04358556 5.90548444 2.09665999]
 [6.91756285 3.07544671 5.77722508 2.04293748]
 [6.66904015 3.02994114 5.39094874 1.88173174]
 [6.14880195 2.65421139 5.13134845 1.77482994]
 [6.53272206 2.96578609 5.25579114 1.825527  ]
 [6.60688475 2.98181821 5.3662607  1.87161698]
 [6.16013695 2.73344296 4.99793961 1.71875852]]

Process finished with exit code 0

你可能感兴趣的:(推荐系统实战)