-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdk_learn_new_words_gh.py
61 lines (49 loc) · 1.51 KB
/
dk_learn_new_words_gh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import string
import sys
from colorama import Fore, Back, Style
from colorama import init
init(autoreset=True)
import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
fname = input("Enter file name: ")
fhand = open(fname, encoding="utf8")
SYMBOLS = '{}()[].,:;+-*/&|<>=~$1234567890?'
words = list()
for line in fhand:
for s in SYMBOLS:
line = line.replace(s," ")
print(line)
line=line.split()
words= words + line
# TYPE OF WORD
for word in words :
print(Fore.YELLOW + word)
try:
url = "https://ordnet.dk/ddo/ordbog?query=%s" % (urllib.parse.quote(word)) # input('Enter URL- ')
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup(html, 'html.parser')
#print(soup)
# # Retrieve all of the anchor divs
divs = soup('div')
for div in divs:
div_class = div.get('class', [])
if "definitionBoxTop" in div_class:
# print(div_class)
children = div.findChildren("span" , recursive=False)
for child in children:
span_class = child.get('class', [])
if "tekstmedium" in span_class:
type_word=child.text
print(Fore.CYAN + type_word)
# SOUND
span = soup.body.find('span', attrs={'class': 'lydskrift'})
print(span.text)
for href in span.find_all('a', recursive=True):
print( Fore.GREEN + str(href.get('href', "NotFound")))
except:
print(Fore.RED + "not found " )