-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathword-search-ii.py
343 lines (325 loc) · 13 KB
/
word-search-ii.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# 212. Word Search II
# 🔴 Hard
#
# https://leetcode.com/problems/word-search-ii/
#
# Tags: Array - String - Backtracking - Trie - Matrix
import timeit
from typing import List
# 1 call with several tests including LeetCode's 62
# » TrieSolution 0.97636 seconds
# » OptimizedTrieSol 0.00227 seconds
# A fast trie implementation, not as readable as using TrieNodes but
# more performant.
class Trie:
def __init__(self, words: List[str] = None):
self.root = {"length": 0}
for word in words:
self.insert(word)
def __len__(self) -> int:
return self.root["length"]
# Insert a word into this trie object.
def insert(self, word: str) -> None:
current = self.root
for c in word:
if c not in current:
current[c] = {"length": 0}
# There is more complete word under this node.
current["length"] += 1
current = current[c]
current["length"] += 1
current["?"] = True
# Remove a word from this trie object.
def remove(self, word: str) -> None:
current = self.root
current["length"] -= 1
for i, c in enumerate(word):
if c in current:
current[c]["length"] -= 1
if current[c]["length"] < 1:
current.pop(c)
break
else:
current = current[c]
# If we get to the word leaf but the trie node has children.
if i == len(word) - 1 and "?" in current:
current.pop("?")
# Check if a given list of chars is in the trie, it returns 0 if
# not found, 1 if found but not a full word and 2 if a full word.
def contains(self, word: List[str]) -> int:
current = self.root
for c in word:
if c not in current:
return 0
current = current[c]
return 2 if "?" in current else 1
# Use a trie to do quick lookups of the words, since the words are max
# 10 characters long, looking up a word in the trie can be done in O(1).
# Iterate over all the positions in the matrix doing DFS starting at
# that position and moving to its neighbors while the words that we are
# constructing are prefixes found in the trie. When we find a word, add
# it to the result set. Using the trie to do prefix and word lookups
# should be all that is expected in an interview but adding the word
# removal is an optimization that could be discussed.
#
# Time complexity: O(m*n*(4*3^10)) - We iterate over all the positions
# on the board, for each, we start a search for any words that can be
# constructed from this position, the search will initially move to the
# four neighbors, then from there, as long as the characters added are
# found in the trie, the search will expand to the three neighbors of
# the new cell, since the cell we just came from cannot be visited again.
# So, from each position in the matrix, we will potentially do 4*3^10
# calls to DFS, since the max depth is equal to the length of the
# longest word in the trie and that is a max of 10. In theory that is
# still O(1) but it seems significant enough that is worth mentioning it
# on the time complexity.
# Space complexity: O(w*c) - The number of characters in all the words
# in the input, we store them all in the trie, and potentially also in
# the result set even though we would not consider that because it is
# used as the output. The call stack will have a max height of 10.
#
# On LeetCode, the obvious trie and DFS solution needs to be optimized
# to pass the tests and not give TLE, removing the words from the trie
# after finding them in the board is enough as of november 2022.
#
# Runtime: 6123 ms, faster than 20.52%
# Memory Usage: 16.3 MB, less than 58.84%
class TrieSolution:
def findWords(self, board: List[List[str]], words: List[str]) -> List[str]:
NUM_ROWS, NUM_COLS = len(board), len(board[0])
# Store the words found.
res = set()
# Initialize a Trie with the words in the input.
trie = Trie(words)
# Define a function that explores the board from a given start
# position.
def dfs(row: int, col: int, current: List[str]) -> None:
current.append(board[row][col])
board[row][col] = "."
found = trie.contains(current)
# If the current branch is not in the trie, not point on
# exploring any further.
if not found:
board[row][col] = current.pop()
return
# If this is an exact match, add it to the result set.
if found == 2:
w = "".join(current)
res.add(w)
trie.remove(w)
# The four directions where neighbors are found.
dirs = ((0, 1), (0, -1), (1, 0), (-1, 0))
for di, dj in dirs:
i, j = row + di, col + dj
if (
0 <= i < NUM_ROWS
and 0 <= j < NUM_COLS
and board[i][j] != "."
):
dfs(i, j, current)
# Backtrack.
board[row][col] = current.pop()
for i in range(NUM_ROWS):
for j in range(NUM_COLS):
dfs(i, j, [])
# If at any point we find all the words,
# return now.
if len(res) == len(words):
return res
return res
# There are a couple of optimizations that we can do once we see the
# particular tests that LeetCode is using. The tests lead to high
# recursion by having boards with high frequencies of one character and
# words with multiple instances of that character at the beginning, for
# example a board with almost all "a" and a high number of words like
# "aaaaaaaaab". Once we know that, we can improve the solution to
# perform better in the tests with a couple of optimizations. The first
# is by creating a lookup set of all the two character combinations
# found in the board and discarding any words that have a two character
# sequence not in the lookup.
# The second is by reversing all words in which we detect a high
# frequency of one character at the beginning. I think that this
# optimizations are very specific for this particular tests and not
# general enough to be very useful in a real world or interview set up,
# but it was an interesting case of optimization.
#
# Time and space complexity are the same as the previous solution even
# though the runtime is hundreds of times faster.
#
# Runtime: 53 ms, faster than 99.89%
# Memory Usage: 14.5 MB, less than 97.86%
class OptimizedTrieSol:
def findWords(self, board: List[List[str]], words: List[str]) -> List[str]:
NUM_ROWS, NUM_COLS = len(board), len(board[0])
# Remove words for which one of their two letter combinations
# cannot be found in the board.
seq_two = set()
candidates = []
reversed_words = set()
# Find all sequences of two characters in the board. Only right
# and down.
for i in range(NUM_ROWS):
for j in range(NUM_COLS - 1):
seq_two.add(board[i][j] + board[i][j + 1])
for j in range(NUM_COLS):
for i in range(NUM_ROWS - 1):
seq_two.add(board[i][j] + board[i + 1][j])
# Iterate over the words checking if they could be in the board.
for word in words:
in_board = True
for i in range(len(word) - 1):
# For each sequence of two characters in the word, check
# if that sequence or its inverse are in the board.
if (
word[i : i + 2] not in seq_two
and word[i + 1] + word[i] not in seq_two
):
in_board = False
break
if not in_board:
continue
# Reverse words with the same character in the first
# four positions.
if word[:4] == word[0] * 4:
word = word[::-1]
reversed_words.add(word)
candidates.append(word)
NUM_ROWS, NUM_COLS = len(board), len(board[0])
# Store the words found.
res = set()
# Initialize a Trie with the words in the input that could be in
# the board potentially, the candidates, some of them may have
# been reversed to make finding them more efficient.
trie = Trie(candidates)
# Define a function that explores the board from a given start
# position.
def dfs(row: int, col: int, current: List[str]) -> None:
current.append(board[row][col])
board[row][col] = "."
found = trie.contains(current)
# If the current branch is not in the trie, not point on
# exploring any further.
if not found:
board[row][col] = current.pop()
return
# If this is an exact match, add it to the result set.
if found == 2:
w = "".join(current)
if w in reversed_words:
res.add(w[::-1])
reversed_words.remove(w)
else:
res.add(w)
trie.remove(w)
# The four directions where neighbors are found.
dirs = ((0, 1), (0, -1), (1, 0), (-1, 0))
for di, dj in dirs:
i, j = row + di, col + dj
if (
0 <= i < NUM_ROWS
and 0 <= j < NUM_COLS
and board[i][j] != "."
):
dfs(i, j, current)
# Backtrack.
board[row][col] = current.pop()
for i in range(NUM_ROWS):
for j in range(NUM_COLS):
dfs(i, j, [])
return res
def test():
executors = [
TrieSolution,
OptimizedTrieSol,
]
tests = [
[[["a"]], ["a"], ["a"]],
[[["a", "a"]], ["aaa"], []],
[[["a", "b"], ["c", "d"]], ["abcb"], []],
[
[
["o", "a", "a", "n"],
["e", "t", "a", "e"],
["i", "h", "k", "r"],
["i", "f", "l", "v"],
],
["oath", "pea", "eat", "rain", "hklf", "hf"],
["oath", "eat", "hklf", "hf"],
],
[
[
["c", "a", "t", "i"],
["i", "a", "n", "o"],
["l", "p", "p", "a"],
],
["app", "application"],
["app", "application"],
],
[
[
["o", "a", "a", "n"],
["e", "t", "a", "e"],
["i", "h", "k", "r"],
["i", "f", "l", "v"],
],
["oath", "pea", "eat", "rain"],
["eat", "oath"],
],
[
[
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
["a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"],
],
[
"a",
"aa",
"aaa",
"aaaa",
"aaaaa",
"aaaaaa",
"aaaaaaa",
"aaaaaaaa",
"aaaaaaaaa",
"aaaaaaaaaa",
],
[
"a",
"aa",
"aaa",
"aaaa",
"aaaaa",
"aaaaaa",
"aaaaaaa",
"aaaaaaaa",
"aaaaaaaaa",
"aaaaaaaaaa",
],
],
]
for executor in executors:
start = timeit.default_timer()
for _ in range(1):
for col, t in enumerate(tests):
sol = executor()
result = sol.findWords(t[0], t[1])
exp = set(t[2])
assert result == exp, (
f"\033[93m» {result} <> {exp}\033[91m for"
+ f" test {col} using \033[1m{executor.__name__}"
)
stop = timeit.default_timer()
used = str(round(stop - start, 5))
cols = "{0:20}{1:10}{2:10}"
res = cols.format(executor.__name__, used, "seconds")
print(f"\033[92m» {res}\033[0m")
test()