Skip to content

Commit 2382f6c

Browse files
committed
Back out "Get rid of first_strobe_is_main (it’s always true)"
This backs out commit 7f5ac33.
1 parent 430294a commit 2382f6c

6 files changed

+48
-45
lines changed

src/index.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,8 @@ void StrobemerIndex::assign_randstrobes(size_t ref_index, size_t offset) {
309309
randstrobe.hash,
310310
randstrobe.strobe1_pos,
311311
static_cast<uint32_t>(ref_index),
312-
static_cast<uint8_t>(randstrobe.strobe2_pos - randstrobe.strobe1_pos)
312+
static_cast<uint8_t>(randstrobe.strobe2_pos - randstrobe.strobe1_pos),
313+
randstrobe.first_strobe_is_main
313314
};
314315
}
315316
chunk.clear();

src/index.hpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ struct StrobemerIndex {
100100

101101
auto pos = std::lower_bound(randstrobes.begin() + position_start,
102102
randstrobes.begin() + position_end,
103-
RefRandstrobe{key, 0, 0, 0},
103+
RefRandstrobe{key, 0, 0, 0, 0},
104104
cmp);
105105
if ((pos->hash() & hash_mask) == masked_key) return pos - randstrobes.begin();
106106
return end();
@@ -122,6 +122,10 @@ struct StrobemerIndex {
122122
}
123123
}
124124

125+
bool first_strobe_is_main(bucket_index_t position) const {
126+
return randstrobes[position].first_strobe_is_main();
127+
}
128+
125129
bool is_filtered(bucket_index_t position) const {
126130
return get_hash(position) == get_hash(position + filter_cutoff);
127131
}
@@ -141,6 +145,9 @@ struct StrobemerIndex {
141145
std::pair<int, int> strobe_extent_partial(bucket_index_t position) const {
142146
// Construct the match from the strobe that was selected as the main part of the hash
143147
int ref_start = get_strobe1_position(position);
148+
if (!first_strobe_is_main(position)) {
149+
ref_start += strobe2_offset(position);
150+
}
144151
return {ref_start, ref_start + k()};
145152
}
146153

@@ -198,7 +205,7 @@ struct StrobemerIndex {
198205

199206
auto pos = std::upper_bound(randstrobes.begin() + position,
200207
randstrobes.begin() + position_end,
201-
RefRandstrobe{key, 0, 0, 0},
208+
RefRandstrobe{key, 0, 0, 0, 0},
202209
cmp);
203210
return (pos - randstrobes.begin() - 1) - position + 1;
204211
}

src/randstrobes.cpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ std::vector<Syncmer> canonical_syncmers(
141141

142142
std::ostream& operator<<(std::ostream& os, const Randstrobe& randstrobe) {
143143
os << "Randstrobe(hash=" << randstrobe.hash << ", strobe1_pos=" << randstrobe.strobe1_pos << ", strobe2_pos="
144-
<< randstrobe.strobe2_pos << ")";
144+
<< randstrobe.strobe2_pos << ", first_strobe_is_main=" << randstrobe.first_strobe_is_main << ")";
145145
return os;
146146
}
147147

@@ -159,7 +159,8 @@ Randstrobe make_randstrobe(Syncmer strobe1, Syncmer strobe2, randstrobe_hash_t m
159159
return Randstrobe{
160160
randstrobe_hash(strobe1.hash, strobe2.hash, main_hash_mask),
161161
static_cast<uint32_t>(strobe1.position),
162-
static_cast<uint32_t>(strobe2.position)
162+
static_cast<uint32_t>(strobe2.position),
163+
true
163164
};
164165
}
165166

@@ -239,7 +240,7 @@ QueryRandstrobeVector randstrobes_query(const std::string_view seq, const IndexP
239240
RandstrobeIterator randstrobe_fwd_iter{syncmers, parameters.randstrobe};
240241
while (randstrobe_fwd_iter.has_next()) {
241242
auto randstrobe = randstrobe_fwd_iter.next();
242-
const unsigned int partial_start = randstrobe.strobe1_pos;
243+
const unsigned int partial_start = randstrobe.first_strobe_is_main ? randstrobe.strobe1_pos : randstrobe.strobe2_pos;
243244
randstrobes.push_back(
244245
QueryRandstrobe {
245246
randstrobe.hash, randstrobe.strobe1_pos, randstrobe.strobe2_pos + parameters.syncmer.k,
@@ -264,7 +265,8 @@ QueryRandstrobeVector randstrobes_query(const std::string_view seq, const IndexP
264265
RandstrobeIterator randstrobe_rc_iter{syncmers, parameters.randstrobe};
265266
while (randstrobe_rc_iter.has_next()) {
266267
auto randstrobe = randstrobe_rc_iter.next();
267-
const unsigned int partial_start = randstrobe.strobe1_pos;
268+
bool first_strobe_is_main = randstrobe.first_strobe_is_main;
269+
const unsigned int partial_start = first_strobe_is_main ? randstrobe.strobe1_pos : randstrobe.strobe2_pos;
268270
randstrobes.push_back(
269271
QueryRandstrobe {
270272
randstrobe.hash, randstrobe.strobe1_pos, randstrobe.strobe2_pos + parameters.syncmer.k,

src/randstrobes.hpp

+11-6
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,20 @@
1717
using syncmer_hash_t = uint64_t;
1818
using randstrobe_hash_t = uint64_t;
1919

20-
static constexpr uint64_t RANDSTROBE_HASH_MASK = 0xFFFFFFFFFFFFFF00;
20+
static constexpr uint64_t RANDSTROBE_HASH_MASK = 0xFFFFFFFFFFFFFE00;
2121

2222
struct RefRandstrobe {
2323
private:
24-
// packed representation of hash and offset
24+
// packed representation of hash, offset and first_strobe_is_main
2525
randstrobe_hash_t m_hash_offset_flag;
2626
uint32_t m_position;
2727
uint32_t m_ref_index;
2828

2929
public:
3030
RefRandstrobe() : m_hash_offset_flag(0), m_position(0), m_ref_index(0) { }
3131

32-
RefRandstrobe(randstrobe_hash_t hash, uint32_t position, uint32_t ref_index, uint8_t offset)
33-
: m_hash_offset_flag((hash & RANDSTROBE_HASH_MASK) | offset)
32+
RefRandstrobe(randstrobe_hash_t hash, uint32_t position, uint32_t ref_index, uint8_t offset, bool first_strobe_is_main)
33+
: m_hash_offset_flag((hash & RANDSTROBE_HASH_MASK) | (offset << 1) | first_strobe_is_main)
3434
, m_position(position)
3535
, m_ref_index(ref_index)
3636
{ }
@@ -45,12 +45,16 @@ struct RefRandstrobe {
4545
return lhs < rhs;
4646
}
4747

48+
bool first_strobe_is_main() const {
49+
return m_hash_offset_flag & 1;
50+
}
51+
4852
unsigned reference_index() const {
4953
return m_ref_index;
5054
}
5155

5256
unsigned strobe2_offset() const {
53-
return m_hash_offset_flag & 0xff;
57+
return (m_hash_offset_flag >> 1) & 0xff;
5458
}
5559

5660
randstrobe_hash_t hash() const {
@@ -86,6 +90,7 @@ struct Randstrobe {
8690
randstrobe_hash_t hash;
8791
unsigned int strobe1_pos;
8892
unsigned int strobe2_pos;
93+
bool first_strobe_is_main;
8994

9095
bool operator==(const Randstrobe& other) const {
9196
return hash == other.hash && strobe1_pos == other.strobe1_pos && strobe2_pos == other.strobe2_pos;
@@ -183,7 +188,7 @@ class RandstrobeGenerator {
183188
{ }
184189

185190
Randstrobe next();
186-
Randstrobe end() const { return Randstrobe{0, 0, 0}; }
191+
Randstrobe end() const { return Randstrobe{0, 0, 0, false}; }
187192

188193
private:
189194
SyncmerIterator syncmer_iterator;

tests/phix.mcs.se.paf

+11-11
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
SRR1377138.32 301 2 293 + NC_001422.1 5386 1434 1725 49 291 255
2-
SRR1377138.33 301 2 297 + NC_001422.1 5386 3818 4113 51 295 255
3-
SRR1377138.34 301 33 299 - NC_001422.1 5386 844 1110 43 266 255
4-
SRR1377138.35 301 5 298 - NC_001422.1 5386 4041 4334 49 293 255
5-
SRR1377138.36 301 3 301 + NC_001422.1 5386 4997 5295 53 298 255
6-
SRR1377138.37 301 2 299 - NC_001422.1 5386 794 1091 43 297 255
7-
SRR1377138.38 301 32 284 - NC_001422.1 5386 4971 5223 44 252 255
8-
SRR1377138.39/1 301 1 295 - NC_001422.1 5386 1791 2085 50 294 255
9-
SRR1377138.40 301 4 293 - NC_001422.1 5386 3020 3309 48 289 255
10-
rescuable.42 301 4 293 - NC_001422.1 5386 3020 3309 48 289 255
11-
not.rescuable 301 4 293 - NC_001422.1 5386 3020 3309 48 289 255
1+
SRR1377138.32 301 2 293 + NC_001422.1 5386 1434 1725 47 291 255
2+
SRR1377138.33 301 2 297 + NC_001422.1 5386 3818 4113 47 295 255
3+
SRR1377138.34 301 33 299 - NC_001422.1 5386 844 1110 40 266 255
4+
SRR1377138.35 301 5 298 - NC_001422.1 5386 4041 4334 47 293 255
5+
SRR1377138.36 301 3 301 + NC_001422.1 5386 4997 5295 51 298 255
6+
SRR1377138.37 301 2 299 - NC_001422.1 5386 794 1091 42 297 255
7+
SRR1377138.38 301 32 284 - NC_001422.1 5386 4971 5223 42 252 255
8+
SRR1377138.39/1 301 1 295 - NC_001422.1 5386 1791 2085 48 294 255
9+
SRR1377138.40 301 4 293 - NC_001422.1 5386 3020 3309 44 289 255
10+
rescuable.42 301 4 293 - NC_001422.1 5386 3020 3309 44 289 255
11+
not.rescuable 301 4 293 - NC_001422.1 5386 3020 3309 44 289 255

tests/test_randstrobes.cpp

+9-21
Original file line numberDiff line numberDiff line change
@@ -39,27 +39,15 @@ TEST_CASE("RefRandstrobe constructor") {
3939
randstrobe_hash_t hash = 0x1234567890ABCDEF & RANDSTROBE_HASH_MASK;
4040
uint32_t position = ~0u;
4141
uint32_t ref_index = RefRandstrobe::max_number_of_references - 1;
42-
SUBCASE("one") {
43-
uint8_t offset = 255;
44-
RefRandstrobe rr{hash, position, ref_index, offset};
45-
46-
CHECK(rr.hash() == hash);
47-
CHECK(rr.position() == position);
48-
CHECK(rr.reference_index() == ref_index);
49-
CHECK(rr.strobe2_offset() == offset);
50-
}
51-
52-
SUBCASE("two") {
53-
uint8_t offset = 0;
54-
RefRandstrobe rr{hash, position, ref_index, offset};
55-
56-
CHECK(rr.hash() == hash);
57-
CHECK(rr.position() == position);
58-
CHECK(rr.reference_index() == ref_index);
59-
CHECK(rr.strobe2_offset() == offset);
60-
}
61-
62-
42+
uint8_t offset = 255;
43+
bool first_strobe_is_main = true;
44+
RefRandstrobe rr{hash, position, ref_index, offset, first_strobe_is_main};
45+
46+
CHECK(rr.hash() == hash);
47+
CHECK(rr.position() == position);
48+
CHECK(rr.reference_index() == ref_index);
49+
CHECK(rr.strobe2_offset() == offset);
50+
CHECK(rr.first_strobe_is_main() == first_strobe_is_main);
6351
}
6452

6553
TEST_CASE("SyncmerIterator") {

0 commit comments

Comments
 (0)