PageRank算法(python实现)

Python 实现的PageRank算法,纯粹使用python原生模块,没有使用numpy、scipy。这个程序实现还比较原始,可优化的地方较多。

#-*- coding:utf-8 -*-
import random

N = 8    		#八个网页
d = 0.85 		#阻尼因子为0.85
delt = 0.00001	#迭代控制变量
#两个矩阵相乘
def matrix_multi(A,B):
	result = [[0]*len(B[0]) for i in range(len(A))]
	for i in range(len(A)):
		for j in range(len(B[0])):
			for k in range(len(B)):
				result[i][j] += A[i][k]*B[k][j] 
	return result

#矩阵A的每个元素都乘以n
def matrix_multiN(n,A):
	result = [[1]*len(A[0]) for i in range(len(A))]
	for i in range(len(A)):
		for j in range(len(A[0])):
			result[i][j] = n*A[i][j]
	return result

#两个矩阵相加
def matrix_add(A,B):
	if len(A[0])!=len(B[0]) and len(A)!=len(B):
		return
	result = [[0]*len(A[0]) for i in range(len(A))] 
	for i in range(len(A)):
		for j in range(len(A[0])):
			result[i][j] = A[i][j]+B[i][j]
	return result 

def pageRank(A):
	e = []
	for i in range(N):
		e.append(1) 
	norm = 100
	New_P = []
	for i in range(N):
		New_P.append([random.random()]) 
	r = [ [(1-d)*i*1/N] for i in e]
	while norm > delt:
		P = New_P
		New_P = matrix_add(r,matrix_multiN(d,matrix_multi(A,P))) #P=(1-d)*e/n+d*M'P PageRank算法的核心
		norm = 0
		#求解矩阵一阶范数
		for i in range(N):
			norm += abs(New_P[i][0]-P[i][0])
	print New_P

#根据邻接矩阵求转移概率矩阵并转向
def tran_and_convert(A):
	result = [[0]*len(A[0]) for i in range(len(A))]
	result_convert = [[0]*len(A[0]) for i in range(len(A))]
	for i in range(len(A)):
		for j in range(len(A[0])):
			result[i][j] = A[i][j]*1.0/sum(A[i])
	for i in range(len(result)):
		for j in range(len(result[0])):
			result_convert[i][j]=result[j][i]
	return result_convert


def main():
	A = [[0,1,1,0,0,1,0,0],\
	[0,0,0,1,1,0,0,0],\
	[0,0,0,1,0,1,0,0],\
	[0,0,0,0,0,1,0,0],\
	[1,0,0,1,0,0,1,1],\
	[0,0,0,1,0,0,0,0],\
	[0,0,1,0,0,0,0,0],\
	[0,0,0,1,0,0,1,0]]
	M = tran_and_convert(A)
	pageRank(M)

if __name__ == '__main__':
	main()


你可能感兴趣的:(python)