-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathquery_api.py
302 lines (251 loc) · 8.21 KB
/
query_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
import json
import cPickle, os, sys, time
__author__ = "hunter"
if __author__ == "hunter":
import MySQLdb as mysql
pwd = "1234"
fpath = "C:/Users/t-honlin/Desktop/wikidata.json"
else:
import mysql.connector as mysql
pwd = "listen"
dbname = "wikidata_simplified2"
def init_mysql():
global cur, db
# First extend the max_packet size.
db = mysql.connect(host="localhost", user="root", passwd=pwd, db=dbname,\
charset="utf8")
cur = db.cursor()
cur.execute("SET GLOBAL net_buffer_length = %s" % 1000000)
cur.execute("SET GLOBAL max_allowed_packet = 1000000000")
db.commit()
# The command is on for next connection.
db = mysql.connect(host="localhost", user="root", passwd=pwd, db=dbname,\
charset="utf8")
cur.execute("SET foreign_key_checks = 0")
cur.execute("SET NAMES 'utf8'")
db.commit()
def get_valid_properties(fname):
prop_dict = {}
f = open(fname)
for line in f.readlines():
line = line.decode("utf8").strip().split("\t")
pid = line[0]
prop_dict[pid] = line[-1]
f.close()
return prop_dict
def query_id_from_name(name):
# only one
sql = "select eid from Entity where label = \"%s\"" % name
print sql
cur.execute(sql)
data = cur.fetchone()
return data[0]
def query_id_from_alias(alias):
sql = "select eid from Alias where alias = \"%s\"" % alias
print sql
cur.execute(sql)
data = cur.fetchone()
return data[0]
def query_name_from_id(id):
sql = "select label from Entity where eid = \"%s\"" % id
print sql
cur.execute(sql)
data = cur.fetchone()
return data[0]
def query_description_from_id(id):
sql = "select description from Entity where eid = \"%s\"" % id
print sql
cur.execute(sql)
data = cur.fetchone()
return data[0]
def query_entities_from_name(name):
sql = "select eid,label,description from Entity where label = \"%s\"" % name
print sql
cur.execute(sql)
data = cur.fetchall()
data = [list(x) for x in data]
return data
def query_entities_from_id(id):
sql = "select eid,label,description from Entity where eid = \"%s\"" % id
print sql
cur.execute(sql)
data = cur.fetchall()
data = [list(x) for x in data]
return data
def query_preceding_categories(eid, detail = True):
# detail is bool
cur.execute("select weid from Preced where eid = \"%s\"" % eid)
weid_set = cur.fetchall()
query_results = []
if detail:
for weid in weid_set:
# print weid,"weid"
# I plan to create index of label.
cur.execute("select label from Entity where eid = \"%s\"" % weid)
res = cur.fetchall()
if len(res) > 0:
res = res[0][0]
query_results.append(weid[0]+" "+res)
else:
query_results.append(weid[0])
return query_results
else:
return weid_set
def query_entity_cooccured(eid):
global cur
cur.execute("select distinct(weid) from Correlation where eid = \"%s\"" % eid)
eset1 = cur.fetchall()
cur.execute("select distinct(eid) from Correlation where weid = \"%s\"" % eid)
eset2 = cur.fetchall()
res = eset1 + eset2
res = [x[0] for x in res]
return list(set(res))
def query_statements_properties(eid):
# all statement
global prop_dict,cur
cur.execute("select cid,property,value from Claim where eid = \"%s\"" % eid)
res = cur.fetchall()
query_results = []
for cid,p,v in res:
# statement = []
pname = prop_dict[p]
cur.execute("select qid from Cqmapping where cid = \"%s\"" % cid)
qid_set = cur.fetchall()
# print v
v = convert_entity_to_name(v)
# print v
# print v
statement = [pname,v]
# statement = statement + pname + " " + v + " "
for qid in qid_set:
cur.execute("select property,value from Qualifier where qid = \"%s\"" % qid)
qp,qv = cur.fetchone()
qv = convert_entity_to_name(qv)
statement = statement + [prop_dict[qp],qv]
query_results.append(statement)
return query_results
def convert_entity_to_name(eid):
# took it as the entity. =.=
global cur
if eid[0] == 'Q':
sql = "select label from Entity where eid = \"%s\"" % eid
# print sql,eid
cur.execute(sql)
res = cur.fetchall()
if len(res) > 0:
return res[0][0] # the first one and the first element
else:
return eid+" (Not found) "
else:
return eid
def tree_search(eid):
sql = "select lft,rgt from htree where eid = \"%s\"" % eid
cur.execute(sql)
res = cur.fetchall()
print res
sql = "select eid from htree where lft < %s and rgt > %s"
result = []
for lft,rgt in res:
print lft,rgt
cur.execute(sql)
result = result + cur.fetchall()
result = [x[0] for x in result]
return result
def recursive_search(eid, eid_set):
sql = "select weid from Preced where eid = \"%s\"" % eid
cur.execute(sql)
res = cur.fetchall()
for weid in res:
eid_set.append(weid[0])
recursive_search(weid[0],eid_set)
# Query1: given a name, find corresponding entities.
def find_entity(ename):
"""Return like a list of [eid, label, description]"""
# data = [
# ["P1", "HAH", "HAHA"]
# ]
return query_entities_from_name(ename)
# return data
# Query2: given a eid, find corresponding tree structure.
def find_tree(eid):
"""Return like a list of [eid, label, description, (layer)?]"""
# data = [
# ["P1", "HAH", "HAHA"]
# ]
# recursive search
eid_set = []
recursive_search(eid, eid_set)
data = []
#tree search
# eid_set = tree_search(eid)
for id in eid_set:
data = data + query_entities_from_id(id)
# return data
# Query3: given a eid, find all coocurred entites.
def find_cooccur(eid):
"""Return like a list of [obj_id, obj_value, pid, pvalue]"""
# data = [
# ["P1", "HAH", "HAHA"]
# ]
data = []
eid_set = query_entity_cooccured(eid)
for id in eid_set:
data = data + query_entities_from_id(id)
return data
# Query4: given a eid, find all its statement and properties.
def find_statements(eid):
"""Return like a list of [pid, pvalue, datavalue,\
qualifer_pid, qualifier_pvalue, qualifer_datavalue]"""
# data = [
# ["P1", "HAH", "HAHA"]
# ]
return query_statements_properties(eid)
# Add: given the name, only find its id. If multiple, only choose the first one.
def find_entityid(ename):
# eid = "Q1"
# prompt = ""
return query_id_from_name(ename)
# Add: given a name, find its entity id based on Alias! Only return the first one.
def find_aliasid(alias):
"""Take the first one if many applies"""
# eid = "Q1"
# prompt = ""
return query_id_from_alias(alias)
# Add: given a eid, find its name.
def find_name(eid):
# name = "ha"
# prompt = ""
return query_name_from_id(eid)
# Add: given a eid, find its description.
def find_description(eid):
# desc = "desc"
# prompt = ""
return query_description_from_id(eid)
# Add: given a eid, and prop_id, find the corresponding datavalue.
# if the datavalue is eid, select its label additionally.
# def find_claim(eid, pid):
# claim = "claim"
# prompt = ""
# return claim
# init_mysql()
# # query 1
# print "query 1"
# start = time.time()
# print query_entities_from_name('Aleksandr Chudakov')
# print time.time() - start
# # query 2
# print "query 2"
# start = time.time()
# print query_preceding_categories("Q1372409");
# print time.time() - start
# # query 3
# print "query 3"
# start = time.time()
# print query_entity_cooccured("Q1372409")
# print time.time() - start
# prop_dict = get_valid_properties("./properties.txt")
# # query 4
# print "query 4"
# start = time.time()
# print query_statements_properties("Q1372409")
# print time.time() - start