-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcombiner.py
131 lines (95 loc) · 3.64 KB
/
combiner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import argparse
import sqlite3
import xml.etree.ElementTree as ElementTree
from typing import Set, Dict, List
from DictionaryEntry import Entry, JapaneseEntry, EnglishEntry, KanjiEntry, Sentence
from DictionaryOutput import DictionaryOutput
def get_stats(pages):
entries = {
"kanji": 0,
"english": 0,
"japanese": 0,
"other": 0,
"kanji_image": 0,
}
for entry in pages:
if isinstance(entry, KanjiEntry):
entries["kanji"] += 1
if entry.image:
entries["kanji_image"] += 1
elif isinstance(entry, JapaneseEntry):
entries["japanese"] += 1
elif isinstance(entry, EnglishEntry):
entries["english"] += 1
else:
entries["other"] += 1
output_text = [
"Created:",
"{} kanji pages ({} with stroke order)".format(entries["kanji"], entries["kanji_image"]),
"{} japanese entries".format(entries["japanese"]),
"{} english entries".format(entries["english"]),
"{} other entries".format(entries["other"])
]
print("\n ".join(output_text))
def get_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("dictionary", type=str)
parser.add_argument("kanji", type=str)
parser.add_argument("english_wordlist", type=str)
parser.add_argument("-o", type=str)
return parser.parse_args()
def create_kanji_pages(kanji_path: str, kanji_images: Set[str]) -> List[KanjiEntry]:
result = []
# Open the kanji XML file
tree = ElementTree.parse(kanji_path)
root = tree.getroot()
# Create all the pages for the kanji
for entry in root:
result.append(KanjiEntry(entry, kanji_images))
return result
def create_japanese_pages(dict_path: str) -> List[JapaneseEntry]:
dictionary_tree = ElementTree.parse(dict_path)
dictionary_root = dictionary_tree.getroot()
result = {}
for entry in dictionary_root:
new_entry = JapaneseEntry(entry)
if new_entry.is_worth_adding():
if new_entry.page_id in result:
# Deduplicate page ids (Kinda hacky, may fix later)
page_id = new_entry.page_id
for x in range(1000):
page_id_new = f"{page_id}-{x}"
if page_id_new not in result:
new_entry.page_id = page_id_new
result[new_entry.page_id] = new_entry
return list(result.values())
def create_english_pages() -> List[EnglishEntry]:
db = sqlite3.connect("output/dictionary.db")
cursor = db.cursor()
result: Dict[str, EnglishEntry] = dict()
query = cursor.execute("SELECT * FROM EnglishTranslations")
for en, expl, jp, context, pos, sense in query.fetchall():
if en not in result:
result[en] = EnglishEntry(en)
if expl != None:
result[en].add_translation(jp, [expl,], pos.split(", "))
else:
result[en].add_translation(jp, context.split(", "), pos.split(", "))
return list(result.values())
def main():
args = get_arguments()
# This will contain all the pages for the dictionary as they are added
pages: Dict[str, Entry] = dict()
image_set = set(filter(lambda x: ".svg" in x, os.listdir("./build/OtherResources/Images")))
pages = set([
*create_kanji_pages(args.kanji, image_set),
*create_japanese_pages(args.dictionary),
*create_english_pages()
])
dictionary = DictionaryOutput(pages)
tree = ElementTree.ElementTree(dictionary.root)
tree.write(args.o, "UTF-8", True)
get_stats(pages)
if __name__ == "__main__":
main()