-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspjson.py
75 lines (66 loc) · 1.86 KB
/
spjson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import sqlite3
conn = sqlite3.connect("spider.sqlite")
cur = conn.cursor()
print("Creating JSON output on spider.js...")
howmany = int(input("How many nodes? "))
cur.execute(
"""SELECT COUNT(from_id) AS inbound, old_rank, new_rank, id, url
FROM Pages JOIN Links ON Pages.id = Links.to_id
WHERE html IS NOT NULL AND ERROR IS NULL
GROUP BY id ORDER BY id,inbound"""
)
fhand = open("spider.js", "w")
nodes = list()
maxrank = None
minrank = None
for row in cur:
nodes.append(row)
rank = row[2]
if maxrank is None or maxrank < rank:
maxrank = rank
if minrank is None or minrank > rank:
minrank = rank
if len(nodes) > howmany:
break
if maxrank == minrank or maxrank is None or minrank is None:
print("Error - please run sprank.py to compute page rank")
quit()
fhand.write('spiderJson = {"nodes":[\n')
count = 0
map = dict()
ranks = dict()
for row in nodes:
if count > 0:
fhand.write(",\n")
# print row
rank = row[2]
rank = 19 * ((rank - minrank) / (maxrank - minrank))
fhand.write("{" + '"weight":' + str(row[0]) + ',"rank":' + str(rank) + ",")
fhand.write(' "id":' + str(row[3]) + ', "url":"' + row[4] + '"}')
map[row[3]] = count
ranks[row[3]] = rank
count = count + 1
fhand.write("],\n")
cur.execute("""SELECT DISTINCT from_id, to_id FROM Links""")
fhand.write('"links":[\n')
count = 0
for row in cur:
# print row
if row[0] not in map or row[1] not in map:
continue
if count > 0:
fhand.write(",\n")
rank = ranks[row[0]]
srank = 19 * ((rank - minrank) / (maxrank - minrank))
fhand.write(
'{"source":'
+ str(map[row[0]])
+ ',"target":'
+ str(map[row[1]])
+ ',"value":3}'
)
count = count + 1
fhand.write("]};")
fhand.close()
cur.close()
print("Open force.html in a browser to view the visualization")