@@ -59,25 +59,23 @@ struct StrobemerIndex {
59
59
60
60
// Find first entry that matches the given key
61
61
size_t find_full (randstrobe_hash_t key) const {
62
- return find (key, 0 );
62
+ return find (key, RANDSTROBE_HASH_MASK );
63
63
}
64
64
65
65
/*
66
66
* Find the first entry that matches the main hash (ignoring the aux_len
67
67
* least significant bits)
68
68
*/
69
69
size_t find_partial (randstrobe_hash_t key) const {
70
- return find (key, parameters.randstrobe .aux_len );
70
+ return find (key, parameters.randstrobe .main_hash_mask );
71
71
}
72
72
73
73
/*
74
- * Find first entry whose hash matches the given key, but ignore the
75
- * b least significant bits
74
+ * Find first entry whose hash matches the given key. Mask both key and
75
+ * entry by hash_mask.
76
76
*/
77
- size_t find (randstrobe_hash_t key, uint8_t b) const {
78
- const unsigned int aux_len = b;
79
- randstrobe_hash_t key_prefix = key >> aux_len;
80
-
77
+ size_t find (randstrobe_hash_t key, uint64_t hash_mask) const {
78
+ randstrobe_hash_t masked_key = key & hash_mask;
81
79
constexpr int MAX_LINEAR_SEARCH = 4 ;
82
80
const unsigned int top_N = key >> (64 - bits);
83
81
bucket_index_t position_start = randstrobe_start_indices[top_N];
@@ -88,19 +86,20 @@ struct StrobemerIndex {
88
86
89
87
if (position_end - position_start < MAX_LINEAR_SEARCH) {
90
88
for ( ; position_start < position_end; ++position_start) {
91
- if (randstrobes[position_start].hash () >> aux_len == key_prefix ) return position_start;
92
- if (randstrobes[position_start].hash () >> aux_len > key_prefix ) return end ();
89
+ if (( randstrobes[position_start].hash () & hash_mask) == masked_key ) return position_start;
90
+ if (( randstrobes[position_start].hash () & hash_mask) > masked_key ) return end ();
93
91
}
94
92
return end ();
95
93
}
96
- auto cmp = [&aux_len](const RefRandstrobe lhs, const RefRandstrobe rhs) {
97
- return (lhs.hash () >> aux_len) < (rhs.hash () >> aux_len); };
94
+ auto cmp = [&hash_mask](const RefRandstrobe lhs, const RefRandstrobe rhs) {
95
+ return (lhs.hash () & hash_mask) < (rhs.hash () & hash_mask);
96
+ };
98
97
99
98
auto pos = std::lower_bound (randstrobes.begin () + position_start,
100
99
randstrobes.begin () + position_end,
101
100
RefRandstrobe{key, 0 , 0 , 0 , 0 },
102
101
cmp);
103
- if (pos->hash () >> aux_len == key_prefix ) return pos - randstrobes.begin ();
102
+ if (( pos->hash () & hash_mask) == masked_key ) return pos - randstrobes.begin ();
104
103
return end ();
105
104
}
106
105
@@ -114,7 +113,7 @@ struct StrobemerIndex {
114
113
115
114
randstrobe_hash_t get_main_hash (bucket_index_t position) const {
116
115
if (position < randstrobes.size ()) {
117
- return randstrobes[position].hash () >> parameters.randstrobe .aux_len ;
116
+ return randstrobes[position].hash () & parameters.randstrobe .main_hash_mask ;
118
117
} else {
119
118
return end ();
120
119
}
@@ -129,8 +128,7 @@ struct StrobemerIndex {
129
128
}
130
129
131
130
bool is_partial_filtered (bucket_index_t position) const {
132
- const unsigned int shift = parameters.randstrobe .aux_len ;
133
- return (get_hash (position) >> shift) == (get_hash (position + partial_filter_cutoff) >> shift);
131
+ return get_main_hash (position) == get_main_hash (position + partial_filter_cutoff);
134
132
}
135
133
136
134
unsigned int get_strobe1_position (bucket_index_t position) const {
@@ -163,14 +161,14 @@ struct StrobemerIndex {
163
161
}
164
162
165
163
unsigned int get_count_full (bucket_index_t position) const {
166
- return get_count (position, 0 );
164
+ return get_count (position, RANDSTROBE_HASH_MASK );
167
165
}
168
166
169
167
unsigned int get_count_partial (bucket_index_t position) const {
170
- return get_count (position, parameters.randstrobe .aux_len );
168
+ return get_count (position, parameters.randstrobe .main_hash_mask );
171
169
}
172
170
173
- unsigned int get_count (bucket_index_t position, uint8_t b ) const {
171
+ unsigned int get_count (bucket_index_t position, uint64_t hash_mask ) const {
174
172
// For 95% of cases, the result will be small and a brute force search
175
173
// is the best option. Once, we go over MAX_LINEAR_SEARCH, though, we
176
174
// use a binary search to get the next position
@@ -182,27 +180,25 @@ struct StrobemerIndex {
182
180
// seed with the given hash to yield the number of seeds with this hash.
183
181
184
182
constexpr unsigned int MAX_LINEAR_SEARCH = 8 ;
185
- const unsigned int aux_len = b;
186
183
187
184
const auto key = randstrobes[position].hash ();
188
- randstrobe_hash_t key_prefix = key >> aux_len ;
185
+ randstrobe_hash_t masked_key = key & hash_mask ;
189
186
190
187
const unsigned int top_N = key >> (64 - bits);
191
188
bucket_index_t position_end = randstrobe_start_indices[top_N + 1 ];
192
189
uint64_t count = 1 ;
193
190
194
191
if (position_end - position < MAX_LINEAR_SEARCH) {
195
192
for (bucket_index_t position_start = position + 1 ; position_start < position_end; ++position_start) {
196
- if (randstrobes[position_start].hash () >> aux_len == key_prefix) {
193
+ if (( randstrobes[position_start].hash () & hash_mask) == masked_key) {
197
194
count += 1 ;
198
- }
199
- else {
195
+ } else {
200
196
break ;
201
197
}
202
198
}
203
199
return count;
204
200
}
205
- auto cmp = [&aux_len ](const RefRandstrobe lhs, const RefRandstrobe rhs) {return (lhs.hash () >> aux_len ) < (rhs.hash () >> aux_len ); };
201
+ auto cmp = [&hash_mask ](const RefRandstrobe lhs, const RefRandstrobe rhs) {return (lhs.hash () & hash_mask ) < (rhs.hash () & hash_mask ); };
206
202
207
203
auto pos = std::upper_bound (randstrobes.begin () + position,
208
204
randstrobes.begin () + position_end,
@@ -223,8 +219,8 @@ struct StrobemerIndex {
223
219
return bits;
224
220
}
225
221
226
- int get_aux_len () const {
227
- return parameters.randstrobe .aux_len ;
222
+ uint64_t get_main_hash_mask () const {
223
+ return parameters.randstrobe .main_hash_mask ;
228
224
}
229
225
230
226
private:
0 commit comments