-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathtrie_insert_search_delete.py
187 lines (155 loc) · 5.37 KB
/
trie_insert_search_delete.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/python
# Date: 2018-09-30
#
# Description:
# Implement prefix trie and perform all basic operations - Insert, Search and
# Delete.
#
# Approach:
# Each node of trie can have max supported alphabets children so we can have a
# array of 26(if only small case chars) whose index can be mapped to 26
# characters. Also we take a flag to store if this node marks an end of word or
# not.
#
# Insert
# ------
# Check if node exist or not, follow its children for next characters in trie
# otherwise add a new node to trie to accommodate new word. When all chars are
# inserted in trie for a word, mark is_end_of_word as True.
#
# Search
# ------
# Similar to insert operation, when we have found whole word, check if end of
# word is set or not, if set whole word exist in trie otherwise not.
#
# Delete
# ------
# Recursive approach followed and nodes deleted in bottom up manner. Lot of
# cases to be handled, explained in delete() below.
#
# Reference:
# https://www.geeksforgeeks.org/trie-insert-and-search/
# https://www.geeksforgeeks.org/trie-delete/
#
# NOTE: Trie can also be implemented using python dictionary instead of list
# of 26 characters, refer: `Level-3/multi_search.py`
#
# Complexity:
# Space to store trie - O(ALPHABET_SET*L*N)
# Insert - O(L)
# Search - O(L)
# Delete - O(L)
#
# ALPHABET_SET - Size of alphabet set, 26 if trie has all lower case chars
# N - Number of words in trie
# L - Length of a word
#
# Limitation:
# This trie is useful in searching words separated with delimeter like
# dictionary where each word is independent of each other. In cases where full
# text search is required in a document this is not useful. We can use suffix
# tries for that.
#
# Another problem is lot of memory is wasted in tries(in suffix tris also), to
# reduce that, we can use compressed tries.
ALPHABET_SIZE = 26
class TrieNode:
"""Hold data required for a single trie node."""
def __init__(self):
"""Initializes a new trie node."""
self.children = [None for i in range(ALPHABET_SIZE)]
self.is_end_of_word = False
def is_leaf_node(self):
"""
Checks if node is leaf or not. If a node don't have any children it is a
leaf node.
"""
for ch in self.children:
if ch:
return False
return True
class Trie:
"""Implements trie with all basic operations."""
def __init__(self):
"""Initializes a new trie."""
self.root = self.get_new_node()
def get_new_node(self):
"""Initializes a new trie node."""
return TrieNode()
def _char_to_array_index(self, ch):
"""Converts given character to array index of size 26 - between 0 to 25."""
return ord(ch) - ord('a')
def insert(self, word):
"""Inserts a new word into trie if not already present."""
ptr = self.root
for ch in word:
index = self._char_to_array_index(ch)
# If current character is not present in trie at required place, create
# a new node and point to that node.
if not ptr.children[index]:
ptr.children[index] = self.get_new_node()
ptr = ptr.children[index]
ptr.is_end_of_word = True
def search(self, key):
"""Searches for a complete key in trie."""
ptr = self.root
for ch in key:
index = self._char_to_array_index(ch)
if not ptr.children[index]:
return False
ptr = ptr.children[index]
return ptr.is_end_of_word
def delete_utils(self, curr_node, key, level, length):
"""Utility function to delete a key from trie.
Args:
curr_node: Reference to current node.
key: Key/word to be deleted from trie.
level: Level till when search is done in trie, root is at level 0.
length: Length of key.
"""
if not curr_node:
print('Deletion failed, %r not found' % key)
return None
# We have found key in trie
if level == length:
# If this is end of word, mark that false as we have to delete this key
# so can't be end of word now
if curr_node.is_end_of_word:
print('%r found, deleting' % key)
curr_node.is_end_of_word = False
# If this is a leaf node, have no children - this should be deleted
return curr_node.is_leaf_node()
else: # Search recursively and delete node(s) if required
index = self._char_to_array_index(key[level])
if self.delete_utils(curr_node.children[index], key, level + 1, length):
del curr_node.children[index] # This is the last node, delete it
# If current node is not an end of word and it doesn't has any children
# then this node should be deleted.
return not curr_node.is_end_of_word and curr_node.is_leaf_node()
return False
def delete(self, key):
"""Deletes a word/key from trie."""
if key:
root = self.root
return self.delete_utils(root, key, 0, len(key))
def main():
trie = Trie()
# Insert in trie
for word in ['the', 'a', 'there', 'answer', 'any', 'by', 'their']:
trie.insert(word)
# Search
print('\nSearching words...')
print(trie.search('by')) # True
print(trie.search('ans')) # False
print(trie.search('the')) # True
print(trie.search('there')) # True
print(trie.search('abs')) # False
print(trie.search('a')) # True
# Delete
print('\nDeleting words...')
trie.delete('Hello') # Deletion failed, Hello not found
trie.delete('by')
print(trie.search('by')) # False
trie.delete('by') # Deletion failed, 'by' not found
if __name__ == '__main__':
main()