Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/arguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct SeedingArguments {
"results with non default values.", {'s'}}
, bits{parser, "INT", "No. of top bits of hash to use as bucket indices (8-31)"
"[determined from reference size]", {'b'}}
, aux_len{parser, "INT", "No. of bits to use from secondary strobe hash [17]", {"aux-len"}}
, aux_len{parser, "INT", "No. of bits to use from secondary strobe hash [16]", {"aux-len"}}
{
}
args::ArgumentParser& parser;
Expand Down
2 changes: 1 addition & 1 deletion src/cmdline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ struct CommandLineOptions {
int u { 7 };
int s { 16 };
int c { 8 };
int aux_len{17};
int aux_len{16};

// Alignment
int A { 2 };
Expand Down
2 changes: 1 addition & 1 deletion src/dumpstrobes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ int run_dumpstrobes(int argc, char **argv) {
}

// Seeding
int r{150}, k{20}, s{16}, c{8}, l{1}, u{7}, aux_len{17};
int r{150}, k{20}, s{16}, c{8}, l{1}, u{7}, aux_len{DEFAULT_AUXLEN};
int max_seed_len{};

bool k_set{false}, s_set{false}, c_set{false}, max_seed_len_set{false}, l_set{false}, u_set{false};
Expand Down
13 changes: 12 additions & 1 deletion src/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,16 @@ void StrobemerIndex::populate(float f, unsigned n_threads) {
partial_filter_cutoff = filter_cutoff;
stats.elapsed_hash_index = hash_index_timer.duration();
stats.distinct_strobemers = unique_mers;

// TODO
if (true) { //(filter_cutoff >= randstrobes.size()) {
// TODO do not work on last filter_cutoff randstrobes
for (size_t i = 0; i < randstrobes.size(); ++i) {
if (get_hash(i) == get_hash(i + filter_cutoff)) {
randstrobes[i].set_filtered();
}
}
}
}

void StrobemerIndex::assign_all_randstrobes(const std::vector<uint64_t>& randstrobe_counts, size_t n_threads) {
Expand Down Expand Up @@ -309,7 +319,8 @@ void StrobemerIndex::assign_randstrobes(size_t ref_index, size_t offset) {
randstrobe.hash,
randstrobe.strobe1_pos,
static_cast<uint32_t>(ref_index),
static_cast<uint8_t>(randstrobe.strobe2_pos - randstrobe.strobe1_pos)
static_cast<uint8_t>(randstrobe.strobe2_pos - randstrobe.strobe1_pos),
0
};
}
chunk.clear();
Expand Down
6 changes: 3 additions & 3 deletions src/index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ struct StrobemerIndex {

auto pos = std::lower_bound(randstrobes.begin() + position_start,
randstrobes.begin() + position_end,
RefRandstrobe{key, 0, 0, 0},
RefRandstrobe{key, 0, 0, 0, 0},
cmp);
if ((pos->hash() & hash_mask) == masked_key) return pos - randstrobes.begin();
return end();
Expand All @@ -123,7 +123,7 @@ struct StrobemerIndex {
}

bool is_filtered(bucket_index_t position) const {
return get_hash(position) == get_hash(position + filter_cutoff);
return randstrobes[position].is_filtered();
}

bool is_partial_filtered(bucket_index_t position) const {
Expand Down Expand Up @@ -198,7 +198,7 @@ struct StrobemerIndex {

auto pos = std::upper_bound(randstrobes.begin() + position,
randstrobes.begin() + position_end,
RefRandstrobe{key, 0, 0, 0},
RefRandstrobe{key, 0, 0, 0, 0},
cmp);
return (pos - randstrobes.begin() - 1) - position + 1;
}
Expand Down
2 changes: 1 addition & 1 deletion src/indexparameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ IndexParameters IndexParameters::from_read_length(int read_length, int k, int s,
}
int q = std::pow(2, c == DEFAULT ? default_c : c) - 1;
if (aux_len == DEFAULT) {
aux_len = 17;
aux_len = DEFAULT_AUXLEN;
}

return IndexParameters(canonical_read_length, k, s, l, u, q, max_dist, aux_len);
Expand Down
3 changes: 2 additions & 1 deletion src/indexparameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <limits>
#include "exceptions.hpp"

static const int DEFAULT_AUXLEN = 16;

struct SyncmerParameters {
const int k;
Expand Down Expand Up @@ -80,7 +81,7 @@ class IndexParameters {
IndexParameters(size_t canonical_read_length, int k, int s, int l, int u, uint64_t q, int max_dist, int aux_len)
: canonical_read_length(canonical_read_length)
, syncmer(k, s)
, randstrobe(q, max_dist, std::max(0, k / (k - s + 1) + l), k / (k - s + 1) + u, ~0ul << (9 + aux_len))
, randstrobe(q, max_dist, std::max(0, k / (k - s + 1) + l), k / (k - s + 1) + u, ~0ul << (10 + aux_len))
{
verify(aux_len);
}
Expand Down
16 changes: 12 additions & 4 deletions src/randstrobes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
using syncmer_hash_t = uint64_t;
using randstrobe_hash_t = uint64_t;

static constexpr uint64_t RANDSTROBE_HASH_MASK = 0xFFFFFFFFFFFFFF00;
static constexpr uint64_t RANDSTROBE_HASH_MASK = 0xFFFFFFFFFFFFFE00;

struct RefRandstrobe {
private:
Expand All @@ -29,8 +29,8 @@ struct RefRandstrobe {
public:
RefRandstrobe() : m_hash_offset_flag(0), m_position(0), m_ref_index(0) { }

RefRandstrobe(randstrobe_hash_t hash, uint32_t position, uint32_t ref_index, uint8_t offset)
: m_hash_offset_flag((hash & RANDSTROBE_HASH_MASK) | offset)
RefRandstrobe(randstrobe_hash_t hash, uint32_t position, uint32_t ref_index, uint8_t offset, bool is_filtered)
: m_hash_offset_flag((hash & RANDSTROBE_HASH_MASK) | (offset << 1) | is_filtered)
, m_position(position)
, m_ref_index(ref_index)
{ }
Expand All @@ -50,7 +50,7 @@ struct RefRandstrobe {
}

unsigned strobe2_offset() const {
return m_hash_offset_flag & 0xff;
return (m_hash_offset_flag >> 1) & 0xff;
}

randstrobe_hash_t hash() const {
Expand All @@ -61,6 +61,14 @@ struct RefRandstrobe {
return m_position;
}

bool is_filtered() const {
return (m_hash_offset_flag & 2) != 0;
}

void set_filtered() {
m_hash_offset_flag |= 2;
}

static constexpr size_t max_number_of_references = (1ul << 32) - 1;
};

Expand Down
2 changes: 1 addition & 1 deletion tests/baseline-commit.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
acc4cffe5ac2c4db266c58d00b7b6462c6b4189c
4306810e64d87e52988673e6eea5356007bd3817
2 changes: 1 addition & 1 deletion tests/test_indexparameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ TEST_CASE("test IndexParameters constructor") {
int max_dist = 180;
uint64_t q = 255;
uint64_t main_hash_mask = 0xfffffffffc000000;
int aux_len = 17;
int aux_len = 16;

SyncmerParameters sp{k, s};
RandstrobeParameters rp{q, max_dist, w_min, w_max, main_hash_mask};
Expand Down
9 changes: 5 additions & 4 deletions tests/test_randstrobes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,27 +39,28 @@ TEST_CASE("RefRandstrobe constructor") {
randstrobe_hash_t hash = 0x1234567890ABCDEF & RANDSTROBE_HASH_MASK;
uint32_t position = ~0u;
uint32_t ref_index = RefRandstrobe::max_number_of_references - 1;

SUBCASE("one") {
uint8_t offset = 255;
RefRandstrobe rr{hash, position, ref_index, offset};
RefRandstrobe rr{hash, position, ref_index, offset, true};

CHECK(rr.hash() == hash);
CHECK(rr.position() == position);
CHECK(rr.reference_index() == ref_index);
CHECK(rr.strobe2_offset() == offset);
CHECK(rr.is_filtered());
}

SUBCASE("two") {
uint8_t offset = 0;
RefRandstrobe rr{hash, position, ref_index, offset};
RefRandstrobe rr{hash, position, ref_index, offset, false};

CHECK(rr.hash() == hash);
CHECK(rr.position() == position);
CHECK(rr.reference_index() == ref_index);
CHECK(rr.strobe2_offset() == offset);
CHECK(!rr.is_filtered());
}


}

TEST_CASE("SyncmerIterator") {
Expand Down
Loading