Skip to content

Commit 4ced581

Browse files
committed
filterbit
1 parent f5122c6 commit 4ced581

File tree

4 files changed

+37
-6
lines changed

4 files changed

+37
-6
lines changed

src/index.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,16 @@ void StrobemerIndex::populate(float f, unsigned n_threads) {
244244
partial_filter_cutoff = filter_cutoff;
245245
stats.elapsed_hash_index = hash_index_timer.duration();
246246
stats.distinct_strobemers = unique_mers;
247+
248+
// TODO
249+
if (true) { //(filter_cutoff >= randstrobes.size()) {
250+
// TODO do not work on last filter_cutoff randstrobes
251+
for (size_t i = 0; i < randstrobes.size(); ++i) {
252+
if (get_hash(i) == get_hash(i + filter_cutoff)) {
253+
randstrobes[i].set_filtered();
254+
}
255+
}
256+
}
247257
}
248258

249259
void StrobemerIndex::assign_all_randstrobes(const std::vector<uint64_t>& randstrobe_counts, size_t n_threads) {
@@ -310,7 +320,8 @@ void StrobemerIndex::assign_randstrobes(size_t ref_index, size_t offset) {
310320
randstrobe.strobe1_pos,
311321
static_cast<uint32_t>(ref_index),
312322
static_cast<uint8_t>(randstrobe.strobe2_pos - randstrobe.strobe1_pos),
313-
randstrobe.first_strobe_is_main
323+
randstrobe.first_strobe_is_main,
324+
0
314325
};
315326
}
316327
chunk.clear();

src/index.hpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ struct StrobemerIndex {
100100

101101
auto pos = std::lower_bound(randstrobes.begin() + position_start,
102102
randstrobes.begin() + position_end,
103-
RefRandstrobe{key, 0, 0, 0, 0},
103+
RefRandstrobe{key, 0, 0, 0, 0, 0},
104104
cmp);
105105
if ((pos->hash() & hash_mask) == masked_key) return pos - randstrobes.begin();
106106
return end();
@@ -128,10 +128,21 @@ struct StrobemerIndex {
128128

129129
bool is_filtered(bucket_index_t position) const {
130130
return get_hash(position) == get_hash(position + filter_cutoff);
131+
return randstrobes[position].is_filtered();
132+
//std::cerr << "pos: " << position << " " << randstrobes[position].is_filtered() << " " << is_partial_filtered(position) << '\n';
133+
if (randstrobes[position].is_filtered() != is_partial_filtered(position)) {
134+
throw std::runtime_error("bla");
135+
}
136+
return is_partial_filtered(position); //randstrobes[position].is_filtered();
131137
}
132138

133139
bool is_partial_filtered(bucket_index_t position) const {
134140
return get_main_hash(position) == get_main_hash(position + partial_filter_cutoff);
141+
if (filter_cutoff != partial_filter_cutoff) {
142+
throw std::runtime_error("not equal");
143+
}
144+
return randstrobes[position].is_filtered();
145+
//return get_main_hash(position) == get_main_hash(position + partial_filter_cutoff);
135146
}
136147

137148
unsigned int get_strobe1_position(bucket_index_t position) const {
@@ -205,7 +216,7 @@ struct StrobemerIndex {
205216

206217
auto pos = std::upper_bound(randstrobes.begin() + position,
207218
randstrobes.begin() + position_end,
208-
RefRandstrobe{key, 0, 0, 0, 0},
219+
RefRandstrobe{key, 0, 0, 0, 0, 0},
209220
cmp);
210221
return (pos - randstrobes.begin() - 1) - position + 1;
211222
}

src/randstrobes.hpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ struct RefRandstrobe {
2929
public:
3030
RefRandstrobe() : m_hash_offset_flag(0), m_position(0), m_ref_index(0) { }
3131

32-
RefRandstrobe(randstrobe_hash_t hash, uint32_t position, uint32_t ref_index, uint8_t offset, bool first_strobe_is_main)
33-
: m_hash_offset_flag((hash & RANDSTROBE_HASH_MASK) | (offset << 2) | first_strobe_is_main)
32+
RefRandstrobe(randstrobe_hash_t hash, uint32_t position, uint32_t ref_index, uint8_t offset, bool first_strobe_is_main, bool is_filtered)
33+
: m_hash_offset_flag((hash & RANDSTROBE_HASH_MASK) | (offset << 2) | (is_filtered << 1) | first_strobe_is_main)
3434
, m_position(position)
3535
, m_ref_index(ref_index)
3636
{ }
@@ -65,6 +65,14 @@ struct RefRandstrobe {
6565
return m_position;
6666
}
6767

68+
bool is_filtered() const {
69+
return (m_hash_offset_flag & 2) != 0;
70+
}
71+
72+
void set_filtered() {
73+
m_hash_offset_flag |= 2;
74+
}
75+
6876
static constexpr size_t max_number_of_references = (1ul << 32) - 1;
6977
};
7078

tests/test_randstrobes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ TEST_CASE("test RefRandstrobe constructor") {
88
uint32_t ref_index = RefRandstrobe::max_number_of_references - 1;
99
uint8_t offset = 255;
1010
bool first_strobe_is_main = true;
11-
RefRandstrobe rr{hash, position, ref_index, offset, first_strobe_is_main};
11+
RefRandstrobe rr{hash, position, ref_index, offset, first_strobe_is_main, true};
1212

1313
CHECK(rr.hash() == hash);
1414
CHECK(rr.position() == position);
1515
CHECK(rr.reference_index() == ref_index);
1616
CHECK(rr.strobe2_offset() == offset);
1717
CHECK(rr.first_strobe_is_main() == first_strobe_is_main);
18+
CHECK(rr.is_filtered());
1819
}

0 commit comments

Comments
 (0)