-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathscrape_word.py
57 lines (51 loc) · 1.76 KB
/
scrape_word.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import urllib.request
from bs4 import BeautifulSoup as Bs
class ScrapeWord:
json = {
# This is the state for whether the program grabbed the word
'state': False,
'word': '',
'dic.com': {
'state': False,
'': ''
},
'voc.com': {
'state': False,
'long': '',
'short': '',
'definition': ''
},
# 'cam.com': {
#
# },
}
def __init__(self, word):
self.json['word'] = word
def scrape(self):
# Dictionary.com scrape
soup = Bs(urllib.request.urlopen('https://www.dictionary.com/browse/' + self.json['word']), 'html.parser')
for cls in ['css-1o58fj8 e1hk9ate4', ]:
try:
c1 = soup.find(class_=cls).get_text()
except AttributeError:
break
# self.json['']
# Vocabulary.com scrape
soup = Bs(urllib.request.urlopen('https://www.vocabulary.com/dictionary/' + self.json['word']), 'html.parser')
for cls in ['long', 'short', 'definitions']:
try:
c1 = soup.find(class_=cls).get_text()
except AttributeError:
break
# self.json['']
# # Cambridge.com support is currently under development since its classes are much messier...
# # Cambridge.com scrape
# soup = Bs(urllib.request.urlopen('https://dictionary.cambridge.org/dictionary/english/' + self.json['word']),
# 'html.parser')
# for cls in ['css-1o58fj8 e1hk9ate4', ]:
# try:
# c1 = soup.find(class_=cls).get_text()
# except AttributeError:
# break
# # self.json['']
return self.json