-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwikidata.py
59 lines (48 loc) · 2.07 KB
/
wikidata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import requests
import json
import wptools
def find_values(id, json_repr):
results = []
def _decode_dict(a_dict):
try:
results.append(a_dict[id])
except KeyError:
pass
return a_dict
json.loads(json_repr, object_hook=_decode_dict) # Return value ignored.
return results
def getParamsAnn(id):
return {'project': 'Rome', 'owner': 'lancaster2019', 'ids': id, 'output': 'ann.json'}
tagtogAPIUrl = "https://www.tagtog.net/-api/documents/v1"
wikipediaUrl = "https://en.wikipedia.org/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&format=json"
wikidataUrl = "https://www.wikidata.org/w/api.php?action=wbgetentities&format=json"
coordinatesPropName = 'coordinate location (P625)'
coordinatesPropId = 'P625'
normalizationId = 'n_9' # Refers back to the normalization id of your project
entityTypeId = 'e_2' # Refers back to the entity id of your project
# Get your collection of docs
auth = requests.auth.HTTPBasicAuth(username='lancaster2019', password='YOUR_PASSWORD')
paramsTagtog = {'project': 'Rome', 'owner': 'lancaster2019', 'search': '*'}
responseSearch = requests.get(tagtogAPIUrl, params=paramsTagtog, auth=auth)
docList = ""
setLocations = set([])
listCoords = []
listLatLong = []
# Iterate over docs
for doc in json.loads(responseSearch.text)['docs']:
responseAnnJson = requests.get(tagtogAPIUrl, params=getParamsAnn(doc['id']), auth= auth)
# Iterate over entities and get the location Id
for entity in json.loads(responseAnnJson.text)['entities']:
if entity['classId'] == entityTypeId and entity['normalizations'] and entity['normalizations'][normalizationId]["source"]["id"] != "":
setLocations.add(entity['normalizations'][normalizationId]["source"]["id"])
# Iterate the locations and get the coordinates
for loc in setLocations:
page = wptools.page(loc)
page.wanted_labels([coordinatesPropId])
page.get_wikidata()
if (coordinatesPropName in page.data['wikidata']):
listCoords.append(page.data['wikidata'][coordinatesPropName])
# clean data
for coord in listCoords:
del coord['altitude']
print(listCoords)