-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo.py
99 lines (71 loc) · 2.37 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# coding: utf-8
# In[ ]:
from __future__ import print_function, unicode_literals
import os
import sys
import codecs
import random
import pickle
from time import time
import numpy as np
import string
import re
import nltk
from nltk import Tree
from collections import defaultdict
from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader
from CYK_parser import CYK_parser
# In[ ]:
if len(sys.argv) == 2: # Interactive mode
MODE = "shell"
MODEL_DIR = sys.argv[1]
elif len(sys.argv) == 4: # Batch mode
MODE = "f"
MODEL_DIR = sys.argv[1]
INPUT_FILE = sys.argv[2]
OUTPUT_FILE = sys.argv[3]
else:
print ("Parameter error!\n")
print ("Please specify the path to PCFG model, or occasionally input and output files.")
PCFG_UNARY_RULES_DICT_FILE = os.path.join(MODEL_DIR, "PCFG_unary_dict.pkl")
PCFG_BINARY_RULES_DICT_FILE = os.path.join(MODEL_DIR, "PCFG_binary_dict.pkl")
PCFG_POSTAGS_DICT_FILE = os.path.join(MODEL_DIR, "PCFG_postags_dict.pkl")
PCFG_NT_SET_FILE = os.path.join(MODEL_DIR, "PCFG_non_terminals_set.pkl")
PCFG_T_SET_FILE = os.path.join(MODEL_DIR, "PCFG_terminals_set.pkl")
PCFG_POSTAGS_SET_FILE = os.path.join(MODEL_DIR, "PCFG_postags_set.pkl")
# In[ ]:
t0 = time()
print (">>> Loading PCFG model parameters...")
with codecs.open(PCFG_UNARY_RULES_DICT_FILE, 'rb') as f:
unary_rules_dict = pickle.load(f)
f.close()
with codecs.open(PCFG_BINARY_RULES_DICT_FILE, 'rb') as f:
binary_rules_dict = pickle.load(f)
f.close()
with codecs.open(PCFG_POSTAGS_DICT_FILE, 'rb') as f:
postags_dict = pickle.load(f)
f.close()
with codecs.open(PCFG_NT_SET_FILE, 'rb') as f:
NT_set = pickle.load(f)
f.close()
with codecs.open(PCFG_T_SET_FILE, 'rb') as f:
T_set = pickle.load(f)
f.close()
with codecs.open(PCFG_POSTAGS_SET_FILE, 'rb') as f:
postags_set = pickle.load(f)
f.close()
print (">>> PCFG model parameters load done in %0.3fs.\n" % (time() - t0))
# In[ ]:
parser = CYK_parser()
parser.initialize(NT_set, T_set, postags_set, unary_rules_dict, binary_rules_dict, postags_dict)
# In[ ]:
if MODE == "f":
parser.parse_corpus(input=INPUT_FILE, output=OUTPUT_FILE, verbose=1)
else:
while True:
sent = raw_input("Enter the sentence to be parsed (empty string to exit): ").decode('utf8')
sent = sent.strip()
if len(sent) == 0:
break
else:
parser.parse_sent(input=sent, verbose=1)