-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwdqquery.py
37 lines (36 loc) · 1.03 KB
/
wdqquery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/python
# A python script for fetching wikidata ids for professions or nationalities
# using sparql endpoint
from SPARQLWrapper import SPARQLWrapper, JSON
import pdb
sparql = SPARQLWrapper('https://query.wikidata.org/bigdata/namespace/wdq/sparql')
pids = {}
with open('data/professions.ids') as fin:
for l in fin:
s = l.strip().split('\t')
if len(s) == 3:
pids[s[0]] = [s[1], s[2]]
with open('data/professions') as fin:
fout = open('data/professions.ids','w')
for l in fin:
pr = l.strip()
if pr in pids:
fout.write(pr + '\t' + pids[pr][0] + '\t' + pids[pr][1] + '\n')
continue
sparql.setQuery("""
SELECT ?item WHERE{
?item rdfs:label \"%s\"@en.
?item wdt:P31 wd:Q28640.
}""" % (pr.lower()))
idval = ''
fbid = ''
try:
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
res = results['results']['bindings'][0]
idval = res['item']['value']
#fbid = res['fbid']['value']
except Exception as e:
print(e)
fout.write(pr + '\t' + idval + '\t' + fbid + '\n')
fout.close()