-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_heatmap_data.py
46 lines (41 loc) · 1.29 KB
/
make_heatmap_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import json
from acdh_tei_pyutils.tei import TeiReader
from collections import Counter
from config import MASTER_ENRICHED
main_file = MASTER_ENRICHED
ns = {"tei": "http://www.tei-c.org/ns/1.0"}
doc = TeiReader(main_file)
places = doc.any_xpath(".//tei:place")
print(len(places))
items = []
for x in places:
name = x.xpath(".//tei:placeName", namespaces=ns)[0].text
try:
coords = x.xpath(".//tei:geo", namespaces=ns)[0].text
except IndexError:
print(f"looks like place {name} has no coords")
continue
try:
img = x.xpath(".//tei:link", namespaces=ns)[0].attrib["target"]
except IndexError:
img = ""
try:
items.append("__".join([name, coords, img]))
except TypeError:
print([name, coords, img])
data_raw = dict(Counter(items))
data = []
for key, value in data_raw.items():
if value > 1000 or value < 2:
continue
item = {}
name, coords, img = key.split("__")
lat, lng = coords.split()[0:2]
try:
item = {"name": name, "lat": float(lat), "lng": float(lng), "count": int(value)}
except ValueError:
continue
data.append(item)
newlist = sorted(data, key=lambda d: d["count"], reverse=True)
with open("./html/data/heatmap.json", "w") as f:
json.dump(newlist, f, ensure_ascii=False)