-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathTrustRank.py
170 lines (126 loc) · 4.55 KB
/
TrustRank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import math
import heapq
import numpy as np
from graphs import plotGraph
from scipy.sparse import csr_matrix as SparseMatrix
class TrustRank:
"""Similar to PageRank but the teleport set is a set of trusted pages.
Contains functions to calculate TrustRank of each node in the web-graph.
Initially trust(rank) is divided into some specific(trusted) pages which
is then propagated to other pages connected to them. Trust decreases as
it goes away from a trusted page.
...
Parameters
----------
beta : float
Probability with which teleports will occur.
edges : collections.defaltdict(list)
Adjacency list containing information connections in web-graph.
epsilon : float
A small value and total error in ranks should be less than epsilon.
max_iterations : int
Maximum number of times to apply power iteration.
node_num : int
Number of nodes in the web-graph.
PageRank_vector : numpy.ndarray [1-dimensional, dtype=float]
Contains PageRank of each node in the web-graph.
order : {'beta', 'edges', 'epsilon', 'max_iterations', 'node_num',
'PageRank_vector'}
Parameters follows precisely the above order.
None of the parameter is optional.
Methods
-------
get_trustedPages(node_number_threshold=100)
Calculates and returns the set of trusted pages.
get_topicSpecificRank(teleport_set)
Calculates TrustRank of each node taking trusted set as `teleport_set`.
trustRank()
Utility function which call other functions and returns rank vector.
"""
def __init__(self, beta, edges, epsilon, max_iterations, node_num,
PageRank_vector):
self.beta = beta
self.edges = edges
self.epsilon = epsilon
self.node_num = node_num
self.PageRank_vector = PageRank_vector
self.MAX_ITERATIONS = max_iterations
def get_trustedPages(self, node_number_threshold=100):
"""Calculates and returns the set of trusted pages.
Parameters
----------
node_number_threshold : int, optional
defines the size of trusted based on whether node_num is greated.
or less than threshold
Default value : 100
Returns
-------
trusted_pages : list of int
List of trusted pages identified by their node number.
"""
# set number of trusted pages
if self.node_num < node_number_threshold:
ratio = 0.2
else:
ratio = 0.0002
trusted_set_size = int(math.ceil(self.node_num * ratio))
# set and return trusted pages
heaped_ranks = [(rank, node) for (node, rank) in
enumerate(self.PageRank_vector)]
heapq._heapify_max(heaped_ranks)
trusted_pages = [heapq._heappop_max(heaped_ranks)[1]
for _ in range(trusted_set_size)]
return trusted_pages
def get_topicSpecificRank(self, teleport_set):
"""Calculates TrustRank of each node taking trusted set as
`teleport_set`.
Parameters
----------
teleport_set : list of int
List of pages to which a random walker in the web-graph can
teleport to.
In TrustRank this set corresponds to trusted pages.
Returns
-------
final_rank_vector : numpy.ndarray [1-dimensional, dtype=float]
Contains TrustRank of each node in the web-graph.
"""
diff = math.inf
iterations = 0
teleport_set_size = len(teleport_set)
pg = plotGraph(self.edges, interval=3000)
final_rank_vector = np.zeros(self.node_num)
initial_rank_vector = np.fromiter(
[1/teleport_set_size if node in teleport_set else 0 for node in
range(self.node_num)], dtype='float')
while(iterations < self.MAX_ITERATIONS and diff > self.epsilon):
new_rank_vector = np.zeros(self.node_num)
for parent in self.edges:
for child in self.edges[parent]:
new_rank_vector[child] += (initial_rank_vector[parent] /
len(self.edges[parent]))
leaked_rank = (1 - sum(new_rank_vector)) / teleport_set_size
leaked_rank_vector = np.array([leaked_rank if node in teleport_set
else 0 for node in range(self.node_num)])
final_rank_vector = new_rank_vector + leaked_rank_vector
diff = sum(abs(final_rank_vector - initial_rank_vector))
initial_rank_vector = final_rank_vector
iterations += 1
print("TrustRank iteration: " + str(iterations))
print(final_rank_vector)
pg.plot(9, final_rank_vector)
return final_rank_vector
def trustRank(self):
"""Utility function which calls other functions in a specific order.
Parameters
----------
None
Returns
-------
final_rank_vector : numpy.ndarray [1-dimensional, dtype=float]
Contains TrustRank of each node in the web-graph.
"""
trusted_pages = self.get_trustedPages()
print("got seed set...")
final_rank_vector = self.get_topicSpecificRank(trusted_pages)
return final_rank_vector