-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPageRanker.java
147 lines (130 loc) · 5.54 KB
/
PageRanker.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
package ranker;
import database_manager.DatabaseManager;
import org.ejml.simple.SimpleMatrix;
import java.sql.*;
import java.util.HashMap;
import java.util.HashSet;
// Assuming page ids are continuous
public class PageRanker {
private final DatabaseManager dbManager;
private Connection dbConnection;
private int startingID;
static int pagesCount;
final static double dampingFactor = 0.85;
final static double iterativeTolerance = 0.001;
final static boolean isAlgebraic = false;
private static class InstanceHolder {
private static final PageRanker instance = new PageRanker();
}
private PageRanker() {
dbManager = new DatabaseManager();
dbConnection = dbManager.getDBConnection();
}
public static PageRanker getInstance() {
return InstanceHolder.instance;
}
public static void main(String[] args) {
PageRanker pageRanker = new PageRanker();
pageRanker.timedUpdatePageRanks();
}
public void timedUpdatePageRanks() {
long start = System.nanoTime();
updatePageRanks();
long end = System.nanoTime();
System.out.println(
"PageRank calculation done for " + PageRanker.pagesCount +
" pages in " + ((end - start) / 1000000) + " ms, " +
"using " + (PageRanker.isAlgebraic ? "algebraic" : "iterative") + " method."
);
}
public void updatePageRanks() {
try {
updatePagesMetadata();
HashMap<Integer, HashSet<Integer>> pageConnections = getPageConnections();
SimpleMatrix adjacencyMatrix = buildAdjacencyMatrix(pageConnections);
SimpleMatrix pageRanks;
if (isAlgebraic)
pageRanks = calcPageRanksAlgebraic(adjacencyMatrix);
else
pageRanks = calcPageRanksIterative(adjacencyMatrix);
savePageRanks(pageRanks);
} catch (Exception ex) {
System.out.println("Failed to calculate PageRank: Exception occurred.");
ex.printStackTrace();
}
}
private SimpleMatrix calcPageRanksAlgebraic(SimpleMatrix adjacencyMatrix) {
SimpleMatrix dampingMatrix = new SimpleMatrix(pagesCount, 1);
dampingMatrix.fill(1.0 - dampingFactor / pagesCount);
return (
SimpleMatrix.identity(pagesCount).minus(adjacencyMatrix.scale(dampingFactor))
).invert().mult(dampingMatrix);
}
private SimpleMatrix calcPageRanksIterative(SimpleMatrix adjacencyMatrix) {
SimpleMatrix dampingMatrix = new SimpleMatrix(pagesCount, 1);
dampingMatrix.fill(1.0 - dampingFactor / pagesCount);
SimpleMatrix pageRanks = new SimpleMatrix(pagesCount, 1);
pageRanks.fill(1.0 / pagesCount);
adjacencyMatrix = adjacencyMatrix.scale(dampingFactor);
boolean converged = false;
SimpleMatrix newPageRanks;
while(!converged) {
newPageRanks = adjacencyMatrix.mult(pageRanks).plus(dampingMatrix);
converged = newPageRanks.minus(pageRanks).elementMaxAbs() < iterativeTolerance;
pageRanks = newPageRanks;
}
return pageRanks;
}
private void savePageRanks(SimpleMatrix pageRanks) throws SQLException {
String query = "UPDATE page SET page_rank = CASE id"
+ " WHEN ? THEN ?".repeat(pagesCount)
+ " END";
PreparedStatement statement = dbConnection.prepareStatement(query);
for(int i = 0; i < pagesCount; i++) {
statement.setInt(2*i+1, i + startingID);
statement.setDouble(2*i+2, pageRanks.get(i, 0));
}
statement.execute();
statement.close();
}
private HashMap<Integer, HashSet<Integer>> getPageConnections() throws SQLException {
String query = "SELECT * FROM page_connections";
Statement statement = dbConnection.createStatement();
ResultSet result = statement.executeQuery(query);
HashMap<Integer, HashSet<Integer>> outboundConnections = new HashMap<>(pagesCount);
for (int pageID = startingID; pageID < pagesCount + startingID; pageID++) {
outboundConnections.put(pageID, new HashSet<>());
}
while (result.next()) {
int pageID = result.getInt(1), outboundPageID = result.getInt(2);
outboundConnections.get(pageID).add(outboundPageID);
}
result.close();
statement.close();
return outboundConnections;
}
private SimpleMatrix buildAdjacencyMatrix(HashMap<Integer, HashSet<Integer>> pageConnections) {
SimpleMatrix adjacencyMatrix = new SimpleMatrix(pagesCount, pagesCount);
pageConnections.forEach((pageID, outboundConnections) -> {
int outboundConnectionsCount = outboundConnections.size();
outboundConnections.forEach(outboundPageID -> {
if(!pageID.equals(outboundPageID))
adjacencyMatrix.set(outboundPageID - startingID,
pageID - startingID,
1.0/outboundConnectionsCount);
}
);
});
return adjacencyMatrix;
}
private void updatePagesMetadata() throws SQLException {
String query = "SELECT COUNT(*), MIN(id) FROM page";
Statement statement = dbConnection.createStatement();
ResultSet result = statement.executeQuery(query);
result.next();
this.pagesCount = result.getInt(1);
this.startingID = result.getInt(2);
result.close();
statement.close();
}
}