-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloaddata.py
74 lines (56 loc) · 1.97 KB
/
loaddata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
Functions to extract the n-gram and tag mappings from the report type mapper and populate into the DB
"""
import urllib, json
import util
import re
import configparser
import psycopg2
import psycopg2.extras
def load1Grams():
url = util.one_gram_url
response = urllib.urlopen(url)
data = json.loads(response.read())
q = data['reportTypes']
conn = psycopg2.connect(util.conn_string)
cursor = conn.cursor()
#cursor.execute("""DELETE from nlp.unigram_tag_map;""")
for i in q:
if len(i['tags']) > 0:
a = i['name']
#b = i['tags'][0]['documentSubjectMatterDomain']
b = []
for j in i['tags']:
b.append(j['documentSubjectMatterDomain'])
b = '$'.join(b)
cursor.execute("""SELECT * FROM nlp.unigram_tag_map WHERE unigram = %s;""", (a,))
result = cursor.fetchall()
if len(result) == 0:
cursor.execute("""INSERT INTO nlp.unigram_tag_map (unigram, tag_name) VALUES (%s,%s);""", (a,b))
conn.commit()
conn.close()
def load2Grams():
url = util.two_gram_url
response = urllib.urlopen(url)
data = json.loads(response.read())
q = data['reportTypes']
conn = psycopg2.connect(util.conn_string)
cursor = conn.cursor()
#cursor.execute("""DELETE from nlp.bigram_tag_map;""")
for i in q:
if len(i['tags']) > 0:
a = i['name']
#b = i['tags'][0]['documentSubjectMatterDomain']
b = []
for j in i['tags']:
b.append(j['documentSubjectMatterDomain'])
b = '$'.join(b)
cursor.execute("""SELECT * FROM nlp.bigram_tag_map WHERE bigram = %s;""", (a,))
result = cursor.fetchall()
if len(result) == 0:
cursor.execute("""INSERT INTO nlp.bigram_tag_map (bigram, tag_name) VALUES (%s,%s);""", (a,b))
conn.commit()
conn.close()
if __name__ == '__main__':
load1Grams()
load2Grams()