pagerank的几个写法的测试

pagerank的几个写法的测试_第1张图片 作者:金良([email protected]) csdn博客:http://blog.csdn.net/u6591

In [1]:
import numpy as np


1.G是概率矩阵(各个列的和为1)且为正矩阵(所有元素大于0)¶


In [2]:
G = np.array([[0.025,0.450,0.450,0.025,0.025,0.025],\
              [0.450,0.025,0.450,0.025,0.025,0.025],\
              [0.025,0.875,0.025,0.025,0.025,0.025],\
              [0.025,0.025,(1-0.025*3)/3,0.025,(1-0.025*3)/3,(1-0.025*3)/3],\
              [1.0/6,1.0/6,1.0/6,1.0/6,1.0/6,1.0/6],\
              [0.0250,0.0250,0.0250,0.4500,0.4500,0.0250]])


In [3]:
G = G.transpose()


In [4]:
v = np.array([1.0/6,1.0/6,1.0/6,1.0/6,1.0/6,1.0/6])


In [5]:
for i in range(15):
    v = np.dot(G,v)
    print v


[ 0.11944444  0.26111111  0.2375      0.11944444  0.16666667  0.09583333]
[ 0.15958333  0.30125     0.24418981  0.08934028  0.12318287  0.0824537 ]
[ 0.17048216  0.31783517  0.26361815  0.07749373  0.10280681  0.06776399]
[ 0.17464424  0.33609464  0.26905572  0.06836399  0.09032055  0.06152086]
[ 0.18063563  0.34071657  0.27422924  0.06394177  0.08331157  0.05716521]
[ 0.18160702  0.34666747  0.276494    0.06109769  0.07921452  0.05491931]
[ 0.18355573  0.34842494  0.27804972  0.05956276  0.07687377  0.05353307]
[ 0.18397105  0.3502439   0.27885835  0.05864201  0.07551812  0.05276657]
[ 0.18455206  0.3509157   0.27935499  0.05812419  0.07473943  0.05231364]
[ 0.18472726  0.35147445  0.2796304   0.05782138  0.0742899   0.05205661]
[ 0.18490105  0.35171933  0.27979285  0.05764846  0.07403118  0.05190713]
[ 0.18496847  0.35189462  0.27988514  0.05754828  0.07388201  0.05182148]
[ 0.18502183  0.35198059  0.27993878  0.05749075  0.07379609  0.05177196]
[ 0.1850462   0.35203668  0.27996952  0.05745753  0.07374658  0.05174349]
[ 0.18506302  0.35206616  0.27998729  0.05743842  0.07371805  0.05172707]


可以发现当G是概率矩阵且为正矩阵时,迭代结果的取值最终会趋于稳定,当然排名也趋于稳定


2.G是正矩阵(所有元素均大于0)¶


In [6]:
alpha = 0.85


In [7]:
S = np.array([[0,1.0/2,1.0/2,0,0,0],
              [1.0/2,0,1.0/2,0,0,0],
              [0,1,0,0,0,0],\
              [0,0,1.0/3,0,1.0/3,1.0/3],
              [0,0,0,0,0,0],
              [0,0,0,1.0/2,1.0/2,0]])


In [8]:
G = alpha*S + np.ones([6,6])*(1-alpha)/6


G中每个元素都大于0


In [9]:
G


Out[9]:
array([[ 0.025     ,  0.45      ,  0.45      ,  0.025     ,  0.025     ,
         0.025     ],
       [ 0.45      ,  0.025     ,  0.45      ,  0.025     ,  0.025     ,
         0.025     ],
       [ 0.025     ,  0.875     ,  0.025     ,  0.025     ,  0.025     ,
         0.025     ],
       [ 0.025     ,  0.025     ,  0.30833333,  0.025     ,  0.30833333,
         0.30833333],
       [ 0.025     ,  0.025     ,  0.025     ,  0.025     ,  0.025     ,
         0.025     ],
       [ 0.025     ,  0.025     ,  0.025     ,  0.45      ,  0.45      ,
         0.025     ]])


In [10]:
G = G.transpose()


In [11]:
v = np.array([1.0/6,1.0/6,1.0/6,1.0/6,1.0/6,1.0/6])


In [12]:
for i in range(25):
    v = np.dot(G,v)
    print v


[ 0.09583333  0.2375      0.21388889  0.09583333  0.14305556  0.07222222]
[ 0.12239583  0.24399306  0.19027778  0.05215278  0.07930556  0.04861111]
[ 0.12211545  0.23217274  0.1889103   0.03907813  0.05385475  0.03319502]
[ 0.11540658  0.22920598  0.17837778  0.03084104  0.04191318  0.0278053 ]
[ 0.11300129  0.21625765  0.17078738  0.027406    0.03614429  0.02432704]
[ 0.10660759  0.20789291  0.16239817  0.02503708  0.03280212  0.02246312]
[ 0.10228451  0.1972767   0.15468658  0.02347685  0.03057069  0.02102387]
[ 0.09707558  0.18818749  0.14719827  0.02216812  0.0288199   0.01988476]
[ 0.09256304  0.178959    0.14010113  0.02103437  0.02731534  0.01886432]
[ 0.08802851  0.17039618  0.13332754  0.01998827  0.02594801  0.01793067]
[ 0.08380885  0.162131    0.12688431  0.01901101  0.02467436  0.01705382]
[ 0.07974476  0.15430951  0.12074998  0.01808696  0.02347341  0.01622554]
[ 0.0758963   0.14684376  0.11491246  0.01721061  0.02233525  0.01543939]
[ 0.07222454  0.13974746  0.1093568   0.01637769  0.02125402  0.01469228]
[ 0.06873399  0.13299003  0.10406976  0.01558554  0.02022588  0.01398166]
[ 0.06541044  0.12656092  0.09903828  0.01483188  0.01924778  0.01330557]
[ 0.06224826  0.12044185  0.09425006  0.01411474  0.01831711  0.01266224]
[ 0.05923864  0.11461892  0.08969333  0.01343231  0.01743148  0.01205003]
[ 0.05637466  0.10907737  0.0853569   0.01278288  0.0165887   0.01146744]
[ 0.05364908  0.1038038   0.08123013  0.01216486  0.01578668  0.01091302]
[ 0.0510553   0.09878516  0.07730287  0.01157672  0.01502343  0.0103854 ]
[ 0.04858691  0.09400917  0.07356549  0.01101702  0.01429709  0.00988329]
[ 0.04623787  0.08946408  0.0700088   0.01048437  0.01360586  0.00940546]
[ 0.04400239  0.08513873  0.06662406  0.00997748  0.01294806  0.00895073]
[ 0.041875    0.08102251  0.06340297  0.0094951   0.01232205  0.00851799]


可以发现当G为正矩阵时,迭代结果的取值不会收敛,但是排名会收敛。


3.在2的基础上对每次迭代的结果归一化,则迭代结果的取值也收敛了¶


In [13]:
v = np.array([1.0/6,1.0/6,1.0/6,1.0/6,1.0/6,1.0/6])


每次迭代,都对v值进行归一化。


In [14]:
for i in range(25):
    v = np.dot(G,v)
    v = v/sum(v)
    print v


[ 0.11165049  0.27669903  0.24919094  0.11165049  0.16666667  0.08414239]
[ 0.16613253  0.33118107  0.25827128  0.07078895  0.10764445  0.06598171]
[ 0.1824453   0.34687523  0.28223943  0.05838426  0.08046111  0.04959467]
[ 0.18507995  0.36758245  0.28606819  0.04946043  0.06721705  0.04459194]
[ 0.19220402  0.36783288  0.29049245  0.04661489  0.06147787  0.04137789]
[ 0.19132699  0.37310218  0.29145348  0.04493367  0.05886945  0.04031422]
[ 0.19323786  0.37269893  0.29223686  0.04435292  0.05775474  0.03971869]
[ 0.19286509  0.37388185  0.29244644  0.04404256  0.05725799  0.03950607]
[ 0.19330795  0.37373663  0.29258613  0.04392803  0.05704515  0.03939611]
[ 0.19320633  0.37398817  0.29262934  0.04387056  0.05695109  0.03935451]
[ 0.19330244  0.37394996  0.2926546   0.04384829  0.05691061  0.03933409]
[ 0.19327839  0.37400193  0.29266325  0.04383759  0.05689281  0.03932604]
[ 0.19329852  0.37399296  0.29266787  0.0438333   0.05688512  0.03932223]
[ 0.19329319  0.37400351  0.29266957  0.04383129  0.05688175  0.03932068]
[ 0.19329732  0.37400153  0.29267043  0.04383047  0.05688029  0.03931997]
[ 0.19329618  0.37400365  0.29267076  0.04383009  0.05687965  0.03931967]
[ 0.19329702  0.37400322  0.29267092  0.04382994  0.05687937  0.03931954]
[ 0.19329678  0.37400364  0.29267098  0.04382987  0.05687925  0.03931948]
[ 0.19329695  0.37400355  0.29267101  0.04382984  0.0568792   0.03931945]
[ 0.1932969   0.37400364  0.29267102  0.04382982  0.05687918  0.03931944]
[ 0.19329693  0.37400362  0.29267103  0.04382982  0.05687917  0.03931944]
[ 0.19329692  0.37400363  0.29267103  0.04382981  0.05687916  0.03931944]
[ 0.19329693  0.37400363  0.29267103  0.04382981  0.05687916  0.03931944]
[ 0.19329692  0.37400363  0.29267103  0.04382981  0.05687916  0.03931944]
[ 0.19329693  0.37400363  0.29267103  0.04382981  0.05687916  0.03931943]


上面3个方法迭代的结果的取值虽然是不同的,但是根据取值大小对接点进行排名的名次是相同的。方法1要求G是概率矩阵且为正矩阵,对G的要求比较严格,所以预处理比较复杂;而方法2和3只要求矩阵是正矩阵即可,且方法3由于对每次迭代的结果进行归一化,所以迭代结果的取值是收敛的。¶


4.不改变最初的概率矩阵的方法¶


In [15]:
G = np.array([[0.0,0.5,0.5,0.0,0.0,0.0],\
              [0.5,0.0,0.5,0.0,0.0,0.0],\
              [0.0,1.0,0.0,0.0,0.0,0.0],\
              [0.0,0.0,1.0/3,0.0,1.0/3,1.0/3],\
              [1.0/6,1.0/6,1.0/6,1.0/6,1.0/6,1.0/6],\
              [0.0,0.0,0.0,0.5,0.5,0.0]])
v = np.array([1.0/6,1.0/6,1.0/6,1.0/6,1.0/6,1.0/6])

G = G.transpose()


每次迭代都给v每个元素一个大小相同的微小增量,根据pagerank值的打印结果可以看到排名是收敛的。


In [16]:
for i in range(25):
    v = v+0.05/6
    v = np.dot(G,v)
    print v


[ 0.11666667  0.29166667  0.2625      0.11666667  0.175       0.0875    ]
[ 0.18055556  0.36388889  0.28472222  0.07847222  0.12013889  0.07222222]
[ 0.20752315  0.40891204  0.33090278  0.06168981  0.090625    0.05034722]
[ 0.22511574  0.46365741  0.35638503  0.04583333  0.06917438  0.0398341 ]
[ 0.24891332  0.49436085  0.38369342  0.03700167  0.05505723  0.03097351]
[ 0.26191219  0.53121517  0.40564718  0.03021851  0.04533018  0.02567676]
[ 0.27871817  0.5580472   0.42669155  0.02594897  0.03879958  0.02179454]
[ 0.29104575  0.58640612  0.44599894  0.02291942  0.03434685  0.01928292]
[ 0.30448309  0.61113518  0.46459022  0.02092149  0.03133908  0.01753095]
[ 0.31634632  0.63594383  0.48250614  0.01954421  0.02929582  0.01636368]
[ 0.32841011  0.65945083  0.50004245  0.01862003  0.02791254  0.01556404]
[ 0.33993306  0.68278848  0.51728924  0.01798967  0.02697412  0.01502543]
[ 0.35144548  0.70564034  0.53435301  0.01756396  0.02633829  0.01465891]
[ 0.36276544  0.72835436  0.55128728  0.01727473  0.02590716  0.01441104]
[ 0.37405059  0.75087675  0.568136    0.01707893  0.02561495  0.01424277]
[ 0.38526309  0.77331935  0.58492581  0.0169461   0.02541685  0.0141288 ]
[ 0.39645137  0.79568238  0.60167606  0.0168561   0.02528258  0.01405151]
[ 0.40761051  0.8180044   0.61839934  0.01679507  0.02519155  0.01399913]
[ 0.41875635  0.84029208  0.6351044   0.01675371  0.02512985  0.01396362]
[ 0.4298899   0.86255977  0.65179709  0.01672567  0.02508802  0.01393955]
[ 0.44101678  0.88481227  0.6684814   0.01670666  0.02505967  0.01392323]
[ 0.4521383   0.90705529  0.68516002  0.01669378  0.02504045  0.01391217]
[ 0.46325661  0.92929147  0.70183479  0.01668505  0.02502742  0.01390467]
[ 0.47437253  0.95152322  0.71850696  0.01667913  0.02501859  0.01389958]
[ 0.48548693  0.97375187  0.73517735  0.01667511  0.0250126   0.01389614]






你可能感兴趣的:(pagerank)