-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpage_rank.py
46 lines (35 loc) · 1.25 KB
/
page_rank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import scipy.sparse as spsp
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import csv
damping = 0.85
tol = 1e-10
adjacency_matrix = spsp.load_npz(
os.path.join("polished_data", "people_adjacency_matrix.npz")
)
M = damping * adjacency_matrix
q = (1 - damping) * np.ones((adjacency_matrix.shape[0],)) / adjacency_matrix.shape[0]
# print(f'{q=}')
page_rank_vector = q.copy()
page_rank_old = page_rank_vector.copy()
enter_in_cycle = False
while (
np.linalg.norm(page_rank_vector - page_rank_old, ord=1) > tol or not enter_in_cycle
):
enter_in_cycle = True
page_rank_old = page_rank_vector.copy()
page_rank_vector = M @ page_rank_vector + q
page_rank_vector /= np.linalg.norm(page_rank_vector, ord=1)
print(f"{np.linalg.norm(page_rank_vector - page_rank_old, ord=1)=}")
# np.save(os.path.join("polished_data", "page_rank_vector.npy"), page_rank_vector)
names = pd.read_csv(os.path.join("polished_data", "people_nodes.csv")).Name.tolist()
# print(names)
pd.DataFrame({"id": names, "PageRank": page_rank_vector}).sort_values(
by="PageRank", ascending=False
).sort_values(by="PageRank", ascending=False).to_csv(
os.path.join("polished_data", "people_page_rank.csv"),
index=False,
quoting=csv.QUOTE_NONNUMERIC,
)