Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/arguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ struct SeedingArguments {
"results with non default values.", {'s'}}
, bits{parser, "INT", "No. of top bits of hash to use as bucket indices (8-31)"
"[determined from reference size]", {'b'}}
, aux_len{parser, "INT", "No. of bits to use from secondary strobe hash [24]", {"aux-len"}}
{
}
args::ArgumentParser& parser;
args::ValueFlag<int> r, m, k, l, u, c, s, bits;
args::ValueFlag<int> r, m, k, l, u, c, s, bits, aux_len;
};

#endif
1 change: 1 addition & 0 deletions src/cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ CommandLineOptions parse_command_line_arguments(int argc, char **argv) {
if (seeding.s) { opt.s = args::get(seeding.s); opt.s_set = true; }
if (seeding.c) { opt.c = args::get(seeding.c); opt.c_set = true; }
if (seeding.bits) { opt.bits = args::get(seeding.bits); }
if (seeding.aux_len) { opt.aux_len = args::get(seeding.aux_len); }

// Alignment
// if (n) { n = args::get(n); }
Expand Down
1 change: 1 addition & 0 deletions src/cmdline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ struct CommandLineOptions {
int u { 7 };
int s { 16 };
int c { 8 };
int aux_len{24};

// Alignment
int A { 2 };
Expand Down
5 changes: 3 additions & 2 deletions src/dumpstrobes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ int run_dumpstrobes(int argc, char **argv) {
}

// Seeding
int r{150}, k{20}, s{16}, c{8}, l{1}, u{7};
int r{150}, k{20}, s{16}, c{8}, l{1}, u{7}, aux_len{24};
int max_seed_len{};

bool k_set{false}, s_set{false}, c_set{false}, max_seed_len_set{false}, l_set{false}, u_set{false};
Expand All @@ -125,7 +125,8 @@ int run_dumpstrobes(int argc, char **argv) {
l_set ? l : IndexParameters::DEFAULT,
u_set ? u : IndexParameters::DEFAULT,
c_set ? c : IndexParameters::DEFAULT,
max_seed_len_set ? max_seed_len : IndexParameters::DEFAULT
max_seed_len_set ? max_seed_len : IndexParameters::DEFAULT,
aux_len ? aux_len : IndexParameters::DEFAULT
);

logger.info() << index_parameters << '\n';
Expand Down
4 changes: 2 additions & 2 deletions src/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <sstream>

static Logger& logger = Logger::get();
static const uint32_t STI_FILE_FORMAT_VERSION = 2;
static const uint32_t STI_FILE_FORMAT_VERSION = 3;


namespace {
Expand Down Expand Up @@ -304,7 +304,7 @@ void StrobemerIndex::assign_randstrobes(size_t ref_index, size_t offset) {
chunk.push_back(randstrobe);
}
for (auto randstrobe : chunk) {
RefRandstrobe::packed_t packed = ref_index << 8;
RefRandstrobe::packed_t packed = (ref_index << 9) | (randstrobe.main_is_first << 8);
packed = packed + (randstrobe.strobe2_pos - randstrobe.strobe1_pos);
randstrobes[offset++] = RefRandstrobe{randstrobe.hash, randstrobe.strobe1_pos, packed};
}
Expand Down
14 changes: 10 additions & 4 deletions src/indexparameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ bool RandstrobeParameters::operator==(const RandstrobeParameters& other) const {
&& this->q == other.q
&& this->max_dist == other.max_dist
&& this->w_min == other.w_min
&& this->w_max == other.w_max;
&& this->w_max == other.w_max
&& this->aux_len == other.aux_len;
}

/* Pre-defined index parameters that work well for a certain
Expand Down Expand Up @@ -48,7 +49,7 @@ static std::vector<Profile> profiles = {
* k, s, l, u, c and max_seed_len can be used to override determined parameters
* by setting them to a value other than IndexParameters::DEFAULT.
*/
IndexParameters IndexParameters::from_read_length(int read_length, int k, int s, int l, int u, int c, int max_seed_len) {
IndexParameters IndexParameters::from_read_length(int read_length, int k, int s, int l, int u, int c, int max_seed_len, int aux_len) {
const int default_c = 8;
size_t canonical_read_length = 50;
for (const auto& p : profiles) {
Expand Down Expand Up @@ -78,8 +79,11 @@ IndexParameters IndexParameters::from_read_length(int read_length, int k, int s,
max_dist = max_seed_len - k; // convert to distance in start positions
}
int q = std::pow(2, c == DEFAULT ? default_c : c) - 1;
if (aux_len == DEFAULT) {
aux_len = 24;
}

return IndexParameters(canonical_read_length, k, s, l, u, q, max_dist);
return IndexParameters(canonical_read_length, k, s, l, u, q, max_dist, aux_len);
}

void IndexParameters::write(std::ostream& os) const {
Expand All @@ -90,6 +94,7 @@ void IndexParameters::write(std::ostream& os) const {
write_int_to_ostream(os, randstrobe.u);
write_int_to_ostream(os, randstrobe.q);
write_int_to_ostream(os, randstrobe.max_dist);
write_int_to_ostream(os, randstrobe.aux_len);
}

IndexParameters IndexParameters::read(std::istream& is) {
Expand All @@ -100,7 +105,8 @@ IndexParameters IndexParameters::read(std::istream& is) {
int u = read_int_from_istream(is);
int q = read_int_from_istream(is);
int max_dist = read_int_from_istream(is);
return IndexParameters(canonical_read_length, k, s, l, u, q, max_dist);
int aux_len = read_int_from_istream(is);
return IndexParameters(canonical_read_length, k, s, l, u, q, max_dist, aux_len);
}

bool IndexParameters::operator==(const IndexParameters& other) const {
Expand Down
14 changes: 9 additions & 5 deletions src/indexparameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,16 @@ struct RandstrobeParameters {
const int max_dist;
const unsigned w_min;
const unsigned w_max;
const unsigned aux_len;

RandstrobeParameters(int l, int u, uint64_t q, int max_dist, unsigned w_min, unsigned w_max)
RandstrobeParameters(int l, int u, uint64_t q, int max_dist, unsigned w_min, unsigned w_max, unsigned aux_len)
: l(l)
, u(u)
, q(q)
, max_dist(max_dist)
, w_min(w_min)
, w_max(w_max)
, aux_len(aux_len)
{
verify();
}
Expand All @@ -65,6 +67,9 @@ struct RandstrobeParameters {
if (w_min > w_max) {
throw BadParameter("w_min is greater than w_max (choose different -l/-u parameters)");
}
if (aux_len > 63) {
throw BadParameter("aux_len is larger than 63");
}
}
};

Expand All @@ -77,16 +82,15 @@ class IndexParameters {

static const int DEFAULT = std::numeric_limits<int>::min();

IndexParameters(size_t canonical_read_length, int k, int s, int l, int u, int q, int max_dist)
IndexParameters(size_t canonical_read_length, int k, int s, int l, int u, int q, int max_dist, int aux_len)
: canonical_read_length(canonical_read_length)
, syncmer(k, s)
, randstrobe(l, u, q, max_dist, std::max(0, k / (k - s + 1) + l), k / (k - s + 1) + u)
, randstrobe(l, u, q, max_dist, std::max(0, k / (k - s + 1) + l), k / (k - s + 1) + u, aux_len)
{
}

static IndexParameters from_read_length(
int read_length, int k = DEFAULT, int s = DEFAULT, int l = DEFAULT, int u = DEFAULT, int c = DEFAULT, int max_seed_len = DEFAULT
);
int read_length, int k = DEFAULT, int s = DEFAULT, int l = DEFAULT, int u = DEFAULT, int c = DEFAULT, int max_seed_len = DEFAULT, int aux_len = DEFAULT);
static IndexParameters read(std::istream& os);
std::string filename_extension() const;
void write(std::ostream& os) const;
Expand Down
4 changes: 3 additions & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ int run_strobealign(int argc, char **argv) {
opt.l_set ? opt.l : IndexParameters::DEFAULT,
opt.u_set ? opt.u : IndexParameters::DEFAULT,
opt.c_set ? opt.c : IndexParameters::DEFAULT,
opt.max_seed_len_set ? opt.max_seed_len : IndexParameters::DEFAULT
opt.max_seed_len_set ? opt.max_seed_len : IndexParameters::DEFAULT,
opt.aux_len ? opt.aux_len : IndexParameters::DEFAULT
);
logger.debug() << index_parameters << '\n';
AlignmentParameters aln_params;
Expand Down Expand Up @@ -228,6 +229,7 @@ int run_strobealign(int argc, char **argv) {
throw InvalidFasta("Too many reference sequences. Current maximum is " + std::to_string(RefRandstrobe::max_number_of_references));
}

logger.debug() << "Auxiliary hash length: " << index_parameters.randstrobe.aux_len << "\n";
StrobemerIndex index(references, index_parameters, opt.bits);
if (opt.use_index) {
// Read the index from a file
Expand Down
23 changes: 15 additions & 8 deletions src/randstrobes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ static inline syncmer_hash_t syncmer_smer_hash(uint64_t packed) {
return xxh64(packed);
}

static inline randstrobe_hash_t randstrobe_hash(syncmer_hash_t hash1, syncmer_hash_t hash2) {
return hash1 + hash2;
static inline randstrobe_hash_t randstrobe_hash(syncmer_hash_t hash1, syncmer_hash_t hash2, size_t aux_len) {
// Make the function symmetric
if (hash1 > hash2) {
std::swap(hash1, hash2);
}
return ((hash1 >> aux_len) << aux_len) ^ (hash2 >> (64 - aux_len));
}

std::ostream& operator<<(std::ostream& os, const Syncmer& syncmer) {
Expand Down Expand Up @@ -131,7 +135,8 @@ std::vector<Syncmer> canonical_syncmers(
}

std::ostream& operator<<(std::ostream& os, const Randstrobe& randstrobe) {
os << "Randstrobe(hash=" << randstrobe.hash << ", strobe1_pos=" << randstrobe.strobe1_pos << ", strobe2_pos=" << randstrobe.strobe2_pos << ")";
os << "Randstrobe(hash=" << randstrobe.hash << ", strobe1_pos=" << randstrobe.strobe1_pos << ", strobe2_pos="
<< randstrobe.strobe2_pos << ", main_is_first=" << randstrobe.main_is_first << ")";
return os;
}

Expand All @@ -145,11 +150,13 @@ std::ostream& operator<<(std::ostream& os, const QueryRandstrobe& randstrobe) {
return os;
}

Randstrobe make_randstrobe(Syncmer strobe1, Syncmer strobe2) {
Randstrobe make_randstrobe(Syncmer strobe1, Syncmer strobe2, int aux_len) {
bool main_is_first = strobe1.hash < strobe2.hash;
return Randstrobe{
randstrobe_hash(strobe1.hash, strobe2.hash),
randstrobe_hash(strobe1.hash, strobe2.hash, aux_len),
static_cast<uint32_t>(strobe1.position),
static_cast<uint32_t>(strobe2.position)
static_cast<uint32_t>(strobe2.position),
main_is_first
};
}

Expand All @@ -175,7 +182,7 @@ Randstrobe RandstrobeIterator::get(unsigned int strobe1_index) const {
}
}

return make_randstrobe(strobe1, strobe2);
return make_randstrobe(strobe1, strobe2, aux_len);
}

Randstrobe RandstrobeGenerator::next() {
Expand Down Expand Up @@ -207,7 +214,7 @@ Randstrobe RandstrobeGenerator::next() {
}
syncmers.pop_front();

return make_randstrobe(strobe1, strobe2);
return make_randstrobe(strobe1, strobe2, aux_len);
}

/*
Expand Down
17 changes: 13 additions & 4 deletions src/randstrobes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,12 @@ struct RefRandstrobe {
return lhs < rhs;
}

bool main_is_first() const {
return (m_packed >> bit_alloc) & 1;
}

int reference_index() const {
return m_packed >> bit_alloc;
return m_packed >> (bit_alloc + 1);
}

int strobe2_offset() const {
Expand All @@ -54,7 +58,7 @@ struct RefRandstrobe {
packed_t m_packed; // packed representation of ref_index and strobe offset

public:
static constexpr uint32_t max_number_of_references = (1 << (32 - bit_alloc)) - 1;
static constexpr uint32_t max_number_of_references = (1 << (32 - bit_alloc - 1)) - 1; // bit_alloc - 1 because 1 bit to main_is_first()
};

struct QueryRandstrobe {
Expand All @@ -74,6 +78,7 @@ struct Randstrobe {
randstrobe_hash_t hash;
unsigned int strobe1_pos;
unsigned int strobe2_pos;
bool main_is_first;

bool operator==(const Randstrobe& other) const {
return hash == other.hash && strobe1_pos == other.strobe1_pos && strobe2_pos == other.strobe2_pos;
Expand Down Expand Up @@ -107,6 +112,7 @@ class RandstrobeIterator {
, w_max(parameters.w_max)
, q(parameters.q)
, max_dist(parameters.max_dist)
, aux_len(parameters.aux_len)
{
if (w_min > w_max) {
throw std::invalid_argument("w_min is greater than w_max");
Expand All @@ -128,7 +134,8 @@ class RandstrobeIterator {
const unsigned w_max;
const uint64_t q;
const unsigned int max_dist;
unsigned int strobe1_index = 0;
const unsigned int aux_len;
unsigned strobe1_index = 0;
};

std::ostream& operator<<(std::ostream& os, const Syncmer& syncmer);
Expand Down Expand Up @@ -176,17 +183,19 @@ class RandstrobeGenerator {
, w_max(randstrobe_parameters.w_max)
, q(randstrobe_parameters.q)
, max_dist(randstrobe_parameters.max_dist)
, aux_len(randstrobe_parameters.aux_len)
{ }

Randstrobe next();
Randstrobe end() const { return Randstrobe{0, 0, 0}; }
Randstrobe end() const { return Randstrobe{0, 0, 0, false}; }

private:
SyncmerIterator syncmer_iterator;
const unsigned w_min;
const unsigned w_max;
const uint64_t q;
const unsigned int max_dist;
const unsigned int aux_len;
std::deque<Syncmer> syncmers;
};

Expand Down