-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathfind-all-anagrams-in-a-string.py
146 lines (137 loc) · 5.02 KB
/
find-all-anagrams-in-a-string.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# 438. Find All Anagrams in a String
# 🟠 Medium
#
# https://leetcode.com/problems/find-all-anagrams-in-a-string/
#
# Tags: Hash Table - String - Sliding Window
import timeit
from collections import defaultdict
from typing import List
# A very similar problem to LeetCode 567. Permutation in String, create
# a hashmap of character frequencies in p, then start iterating over a
# window of s of the same length as p using the hashmap to check if the
# sliding window contains exactly the same characters, and in the same
# frequency as p, to slide the window, we add the character to the right
# and remove the one to the left from the frequencies dictionary.
#
# Time complexity: O(n) - Where n is the number of characters in s, we
# iterate over all characters in s and do O(1) work for each.
# Space complexity: O(1) - The input strings can only have English
# lowercase letters, the frequency dictionary can grow at max to 26.
#
# Runtime: 98 ms Beats 95.92%
# Memory Usage: 15.2 MB Beats 33.28%
class SlidingWindow:
def findAnagrams(self, s: str, p: str) -> List[int]:
# Base cases
if len(p) > len(s):
return []
# Dictionary with character frequencies that we need for a match.
# Positive entries denote too many of a character, negative ones
# too few.
freq = defaultdict(int)
for c in p:
freq[c] -= 1
# Add the matches of the first substring of size p.
for i in range(len(p)):
key = s[i]
# We need to match one less of this character.
freq[key] += 1
if freq[key] == 0:
del freq[key]
# Store the indexes at which an anagram starts.
result = []
# Base case where the first sequence matches.
if not freq:
result.append(0)
# Use an explicit left pointer and a for loop for an implicit
# right pointer.
left = 0
for right in range(len(p), len(s)):
# Remove one occurrence of the left character from the
# frequencies map.
key = s[left]
freq[key] -= 1
if freq[key] == 0:
del freq[key]
left += 1
# Add an occurrence of the right character to the
# frequencies map.
key = s[right]
freq[key] += 1
if freq[key] == 0:
del freq[key]
# When the frequencies map shows that we have a match,
# add it to the result set.
if not freq:
result.append(left)
return result
# An optimization of the previous solution that uses arrays to store
# the frequencies of elements.
#
# Time complexity: O(n) - Where n is the number of characters in s.
# Space complexity: O(1) - We use an array of size 26.
#
# Runtime 102 ms Beats 93.63%
# Memory 15.2 MB Beats 74.41%
class UseArray:
def findAnagrams(self, s: str, p: str) -> List[int]:
# Base case.
if len(p) > len(s):
return []
# Array of frequencies. Initialized to the missing frequencies
# in p.
freq, base = [0] * 26, ord("a")
for i in range(len(p)):
freq[ord(p[i]) - base] -= 1
freq[ord(s[i]) - base] += 1
# Store the indexes at which an anagram starts.
result = [0] if not any(freq) else []
# Use an explicit left pointer and a for loop for an implicit
# right pointer.
left = 0
for right in range(len(p), len(s)):
# Remove one occurrence of the left character from the
# frequencies map.
freq[ord(s[left]) - base] -= 1
left += 1
# Add an occurrence of the right character to the
# frequencies map.
freq[ord(s[right]) - base] += 1
# When the frequencies map shows that we have a match,
# add it to the result set.
if not any(freq):
result.append(left)
return result
def test():
executors = [
SlidingWindow,
UseArray,
]
tests = [
["a", "a", [0]],
["baa", "aa", [1]],
["abab", "ab", [0, 1, 2]],
["cbaebabacd", "abc", [0, 6]],
["aaaaaaaaaa", "aaaaaaaaac", []],
["abacbabc", "abc", [1, 2, 3, 5]],
["aaaaaaaaaa", "aaaaaaaaaa", [0]],
["aaaaaaaaaa", "aaaaaaaaaaaaa", []],
]
for executor in executors:
start = timeit.default_timer()
for _ in range(int(float("1"))):
for col, t in enumerate(tests):
sol = executor()
result = sol.findAnagrams(t[0], t[1])
exp = t[2]
assert result == exp, (
f"\033[93m» {result} <> {exp}\033[91m for"
+ f" test {col} using \033[1m{executor.__name__}"
)
stop = timeit.default_timer()
used = str(round(stop - start, 5))
cols = "{0:20}{1:10}{2:10}"
res = cols.format(executor.__name__, used, "seconds")
print(f"\033[92m» {res}\033[0m")
test()