一、PageRank的基本思想
起源于网页排名,如果有很多网页(T)都有指向一个网页(A)的链接,说明网页(A)比较重要,如果没有网页指向网页(A),那么A就是孤零零的存在
二、PageRank算法的原理-基本概念
1、出链:网页A可以跳转到网页B, 说明网页A出链网页B
2、入链:由A入链B
3、无出链: 没有跳转的链接到该网页
4、只对自己出链:网页的上有该网页的链接
5、PR值:网页被访问的概率(Pagerank值)
三、PageRank算法的原理-出入链关系
四、pagerank算法公式的说明
四、马尔科夫实例验证
# -*- coding:utf-8 -*-
'''
@Author : wangtao
@Time : 19/10/18 上午9:04
@desc :
'''
import numpy as np
def markov():
init_array = np.array([0.3, 0.2, 0.4, 0.1])
transfer_matrix = np.array([[0, 0.5, 0, 0],
[0.333, 0, 0, 0.5],
[0.333, 0, 1, 0.5],
[0.333, 0.5, 0, 0]])
restmp = init_array
for i in range(100):
res = 0.85 * np.dot(restmp,transfer_matrix) + 0.15 * restmp
print(i,"=====", res)
restmp = res
markov()
0 ===== [0.243135 0.2 0.4 0.27 ]
1 ===== [0.28272375 0.24808238 0.4 0.2955 ]
2 ===== [0.30948955 0.28295745 0.4 0.31976001]
3 ===== [0.33024261 0.30987468 0.4 0.33822092]
4 ===== [0.34619985 0.3305782 0.4 0.35242988]
5 ===== [0.35847541 0.34650436 0.4 0.36336022]
6 ===== [0.36791848 0.3587558 0.4 0.37176839]
7 ===== [0.37518264 0.36818029 0.4 0.37823647]
8 ===== [0.38077066 0.37543017 0.4 0.38321209]
9 ===== [0.38506929 0.3810072 0.4 0.38703964]
10 ===== [0.38837605 0.38529737 0.4 0.389984 ]
11 ===== [0.3909198 0.38859763 0.4 0.39224898]
12 ===== [0.3928766 0.39113638 0.4 0.39399134]
13 ===== [0.39438189 0.39308933 0.4 0.39533166]
14 ===== [0.39553985 0.39459166 0.4 0.39636272]
15 ===== [0.39643061 0.39574734 0.4 0.39715586]
16 ===== [0.39711584 0.39663635 0.4 0.397766 ]
17 ===== [0.39764296 0.39732024 0.4 0.39823535]
18 ===== [0.39804845 0.39784632 0.4 0.3985964 ]
19 ===== [0.39836038 0.39825101 0.4 0.39887415]
20 ===== [0.39860033 0.39856232 0.4 0.3990878 ]
21 ===== [0.39878492 0.39880181 0.4 0.39925216]
22 ===== [0.39892691 0.39898603 0.4 0.39937859]
23 ===== [0.39903614 0.39912774 0.4 0.39947585]
24 ===== [0.39912017 0.39923676 0.4 0.39955067]
25 ===== [0.39918481 0.39932062 0.4 0.39960822]
26 ===== [0.39923453 0.39938513 0.4 0.3996525 ]
27 ===== [0.39927278 0.39943476 0.4 0.39968655]
28 ===== [0.3993022 0.39947293 0.4 0.39971275]
29 ===== [0.39932484 0.3995023 0.4 0.39973291]
30 ===== [0.39934225 0.39952489 0.4 0.39974841]
31 ===== [0.39935565 0.39954226 0.4 0.39976034]
32 ===== [0.39936595 0.39955563 0.4 0.39976951]
33 ===== [0.39937388 0.39956592 0.4 0.39977657]
34 ===== [0.39937997 0.39957383 0.4 0.399782 ]
35 ===== [0.39938466 0.39957991 0.4 0.39978618]
36 ===== [0.39938827 0.39958459 0.4 0.39978939]
37 ===== [0.39939105 0.39958819 0.4 0.39979186]
38 ===== [0.39939318 0.39959096 0.4 0.39979376]
39 ===== [0.39939482 0.3995931 0.4 0.39979522]
40 ===== [0.39939609 0.39959473 0.4 0.39979635]
41 ===== [0.39939706 0.399596 0.4 0.39979721]
42 ===== [0.39939781 0.39959697 0.4 0.39979788]
43 ===== [0.39939838 0.39959771 0.4 0.39979839]
44 ===== [0.39939882 0.39959829 0.4 0.39979879]
45 ===== [0.39939917 0.39959873 0.4 0.39979909]
46 ===== [0.39939943 0.39959907 0.4 0.39979932]
47 ===== [0.39939963 0.39959933 0.4 0.3997995 ]
48 ===== [0.39939978 0.39959953 0.4 0.39979964]
49 ===== [0.3993999 0.39959968 0.4 0.39979975]
50 ===== [0.39939999 0.3995998 0.4 0.39979983]
51 ===== [0.39940006 0.39959989 0.4 0.39979989]
52 ===== [0.39940012 0.39959997 0.4 0.39979994]
53 ===== [0.39940016 0.39960002 0.4 0.39979998]
54 ===== [0.39940019 0.39960006 0.4 0.3998 ]
55 ===== [0.39940022 0.39960009 0.4 0.39980003]
56 ===== [0.39940024 0.39960012 0.4 0.39980004]
57 ===== [0.39940025 0.39960014 0.4 0.39980006]
58 ===== [0.39940026 0.39960015 0.4 0.39980007]
59 ===== [0.39940027 0.39960016 0.4 0.39980007]
60 ===== [0.39940028 0.39960017 0.4 0.39980008]
61 ===== [0.39940028 0.39960018 0.4 0.39980008]
62 ===== [0.39940029 0.39960018 0.4 0.39980009]
63 ===== [0.39940029 0.39960019 0.4 0.39980009]
64 ===== [0.39940029 0.39960019 0.4 0.39980009]
65 ===== [0.39940029 0.39960019 0.4 0.39980009]
66 ===== [0.3994003 0.39960019 0.4 0.3998001 ]
67 ===== [0.3994003 0.3996002 0.4 0.3998001]
68 ===== [0.3994003 0.3996002 0.4 0.3998001]
69 ===== [0.3994003 0.3996002 0.4 0.3998001]
70 ===== [0.3994003 0.3996002 0.4 0.3998001]
71 ===== [0.3994003 0.3996002 0.4 0.3998001]
72 ===== [0.3994003 0.3996002 0.4 0.3998001]
73 ===== [0.3994003 0.3996002 0.4 0.3998001]
74 ===== [0.3994003 0.3996002 0.4 0.3998001]
75 ===== [0.3994003 0.3996002 0.4 0.3998001]
76 ===== [0.3994003 0.3996002 0.4 0.3998001]
77 ===== [0.3994003 0.3996002 0.4 0.3998001]
78 ===== [0.3994003 0.3996002 0.4 0.3998001]
79 ===== [0.3994003 0.3996002 0.4 0.3998001]
80 ===== [0.3994003 0.3996002 0.4 0.3998001]
81 ===== [0.3994003 0.3996002 0.4 0.3998001]
82 ===== [0.3994003 0.3996002 0.4 0.3998001]
83 ===== [0.3994003 0.3996002 0.4 0.3998001]
84 ===== [0.3994003 0.3996002 0.4 0.3998001]
85 ===== [0.3994003 0.3996002 0.4 0.3998001]
86 ===== [0.3994003 0.3996002 0.4 0.3998001]
87 ===== [0.3994003 0.3996002 0.4 0.3998001]
88 ===== [0.3994003 0.3996002 0.4 0.3998001]
89 ===== [0.3994003 0.3996002 0.4 0.3998001]
90 ===== [0.3994003 0.3996002 0.4 0.3998001]
91 ===== [0.3994003 0.3996002 0.4 0.3998001]
92 ===== [0.3994003 0.3996002 0.4 0.3998001]
93 ===== [0.3994003 0.3996002 0.4 0.3998001]
94 ===== [0.3994003 0.3996002 0.4 0.3998001]
95 ===== [0.3994003 0.3996002 0.4 0.3998001]
96 ===== [0.3994003 0.3996002 0.4 0.3998001]
97 ===== [0.3994003 0.3996002 0.4 0.3998001]
98 ===== [0.3994003 0.3996002 0.4 0.3998001]
99 ===== [0.3994003 0.3996002 0.4 0.3998001]
Process finished with exit code 0
五、Graphx实现pageRank算法
import org.apache.spark.SparkConf
import org.apache.spark.api.java.JavaSparkContext
import org.neo4j.spark.Neo4j
object PageRank {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setAppName("neo4j")
.setMaster("local[*]")
.set("spark.neo4j.bolt.url","bolt://ip")
.set("spark.neo4j.bolt.user","user_name")
.set("spark.neo4j.bolt.password","passwd")
val sc = new JavaSparkContext(conf)
sc.setLogLevel("OFF")
val nenoj = Neo4j(sc)
val gpaphFrame = neo4j.pattern(("page","value"),("TO",null),("Page","value"))
.partitions(1).rows(100).loadGraphFrame
val pageRankFrame = graphFrame.pageRank.maxIter(100).run()
val ranked = pageRankFrame.vertices
ranked.foreach(println(_))
val pageRankSort = ranked.orderBy(ranked.col("pagerank").desc)
pageRankSort.foreach(println())
}
}