I've been tinkering with this python snippet which is supposed to demonstrate four different approaches of calculating PageRank.
the code:
from numpy import *
def powerMethodBase(A,x0,iter):
""" basic power method """
for i in range(iter):
x0 = dot(A,x0)
x0 = x0/linalg.norm(x0,1)
return x0
def powerMethod(A,x0,m,iter):
""" power method modified to compute
the maximal real eigenvector
of the matrix M built on top of the input matrix A """
n = A.shape[1]
delta = m*(array([1]*n,dtype='float64')/n) # array([1]*n is [1 1 ... 1] n times
for i in range(iter):
x0 = dot((1-m),dot(A,x0)) + delta
return x0
def maximalEigenvector(A):
""" using the eig function to compute eigenvectors """
n = A.shape[1]
w,v = linalg.eig(A)
return abs(real(v[:n,0])/linalg.norm(v[:n,0],1))
def linearEquations(A,m):
""" solving linear equations
of the system (I-(1-m)*A)*x = m*s """
n = A.shape[1]
C = eye(n,n)-dot((1-m),A)
b = m*(array([1]*n,dtype='float64')/n)
return linalg.solve(C,b)
def getTeleMatrix(A,m):
""" return the matrix M
of the web described by A """
n = A.shape[1]
S = ones((n,n))/n
return (1-m)*A+m*S
A = array([ [0, 0, 0, 1, 0, 1],
[1/2.0, 0, 0, 0, 0, 0],
[0, 1/2.0, 0, 0, 0, 0],
[0, 1/2.0, 1/3.0, 0, 0, 0],
[0, 0, 1/3.0, 0, 0, 0],
[1/2.0, 0, 1/3.0, 0, 1, 0 ] ])
n = A.shape[1] # A is n x n
m = 0.15
M = getTeleMatrix(A,m)
x0 = [1]*n
x1 = powerMethod(A,x0,m,130)
x2 = powerMethodBase(M,x0,130)
x3 = maximalEigenvector(M)
x4 = linearEquations(A,m)
# comparison of the four methods
labels = range(1,6)
print array([labels, x1, x2, x3, x4]).T
The expected output:
[[ 1. 0.32954577 0.32954577 0.32954577 0.32954577]
[ 2. 0.16505695 0.16505695 0.16505695 0.16505695]
[ 3. 0.0951492 0.0951492 0.0951492 0.0951492 ]
[ 4. 0.12210815 0.12210815 0.12210815 0.12210815]
[ 5. 0.05195894 0.05195894 0.05195894 0.05195894]
[ 6. 0.23618099 0.23618099 0.23618099 0.23618099]]
After modifying line 59 to labels = range(1,7)
, I ran the script locally and got the expected output above. Note how each line in the output repeats the same page rank four times to demonstrate that all four methods of calculating pagerank work and return the same results.
However, when I swap out the A array for a different one:
A = array([ [0, 0, 1/2.0, 0],
[1, 0, 1/2.0, 0],
[0, 0, 0, 0],
[0, 1, 0, 0] ])
It gives me this output:
[[ 1. 0.0534375 0.11790211 0.11790211 0.0534375 ]
[ 2. 0.09885937 0.29698454 0.29698454 0.09885937]
[ 3. 0.0375 0.06701063 0.06701063 0.0375 ]
[ 4. 0.12153047 0.51810272 0.51810272 0.12153047]]
note how the first and fourth methods return the same result and the second and third return the same results, but all four are returning different answers. What am I doing wrong here? Are the four methods supposed to return different results at times?
I tried modifying the getTeleMatrix()
function but it didn't solve the problem.
Any help is greatly appreciated.