forked from jaymovaliya/Question_Generation_NLP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
47 lines (43 loc) · 1.22 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import operator
import string
def TermFreq(wordDict, text):
tfDict = {}
totlen = len(text)
for w, cnt in wordDict.items():
tfDict[w] = cnt / float(totlen)
return tfDict
f = open('image.txt', encoding="utf8")
content = f.read().lower()
stop_words = set(stopwords.words('english'))
stop_words.add("’")
data = word_tokenize(content)
sentenses = sent_tokenize(content)
print(sentenses)
filter_text = []
for w in data:
if w not in stop_words and w not in string.punctuation:
filter_text.append(w)
wordDict = dict.fromkeys(filter_text, 0)
for w in filter_text:
wordDict[w] += 1
print(wordDict)
tf = TermFreq(wordDict, filter_text)
sorted_tf = sorted(tf.items(), key=operator.itemgetter(1), reverse=True)
print(sorted_tf)
senDict = dict.fromkeys(sentenses, 0)
for s in sentenses:
tok = word_tokenize(s)
length = 0
weight = 0
for w in tok:
if (w in tf.keys()):
length += 1
weight += tf[w]
senDict[s] = weight / length
print(senDict)
sorted_sd = sorted(senDict.items(), key=operator.itemgetter(1), reverse=True)
for sen, val in sorted_sd:
print(sen)
print(val)