Skip to content

Commit 8bc4137

Browse files
committed
[MISC] C++23 md subscript
1 parent 99b6f98 commit 8bc4137

File tree

6 files changed

+104
-59
lines changed

6 files changed

+104
-59
lines changed

include/hibf/hierarchical_interleaved_bloom_filter.hpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter
2222
#include <hibf/layout/layout.hpp> // for layout
2323
#include <hibf/misc/counting_vector.hpp> // for counting_vector
24-
#include <hibf/misc/timer.hpp> // for concurrent_timer
25-
#include <hibf/platform.hpp> // for HIBF_CONSTEXPR_VECTOR
24+
#include <hibf/misc/md_vector.hpp>
25+
#include <hibf/misc/timer.hpp> // for concurrent_timer
26+
#include <hibf/platform.hpp> // for HIBF_CONSTEXPR_VECTOR
2627

2728
namespace seqan::hibf
2829
{
@@ -202,7 +203,7 @@ class hierarchical_interleaved_bloom_filter
202203
* If `j != i` is returned, there is a lower level IBF, bin `b` is a merged bin, and `j` is the ID of the lower
203204
* level IBF in ibf_vector.
204205
*/
205-
std::vector<std::vector<int64_t>> next_ibf_id;
206+
md_vector<int64_t> next_ibf_id;
206207

207208
/*!\brief Stores for each bin in each IBF of the HIBF the user bin ID.
208209
* \details
@@ -211,7 +212,7 @@ class hierarchical_interleaved_bloom_filter
211212
* lower level IBF.
212213
* Otherwise, the returned value `j` is the corresponding user bin ID.
213214
*/
214-
std::vector<std::vector<int64_t>> ibf_bin_to_user_bin_id{};
215+
md_vector<int64_t> ibf_bin_to_user_bin_id{};
215216

216217
//!\brief Returns a membership_agent to be used for counting.
217218
membership_agent_type membership_agent() const;
@@ -280,16 +281,16 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type
280281
{
281282
sum += result[bin];
282283

283-
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];
284+
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin];
284285

285286
if (current_filename_index < 0) // merged bin
286287
{
287288
if (sum >= threshold)
288-
membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
289+
membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx, bin], threshold);
289290
sum = 0u;
290291
}
291292
else if (bin + 1u == result.size() || // last bin
292-
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
293+
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin + 1]) // end of split bin
293294
{
294295
if (sum >= threshold)
295296
result_buffer.emplace_back(current_filename_index);
@@ -415,16 +416,16 @@ class hierarchical_interleaved_bloom_filter::counting_agent_type
415416
for (size_t bin{}; bin < result.size(); ++bin)
416417
{
417418
sum += result[bin];
418-
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];
419+
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin];
419420

420421
if (current_filename_index < 0) // merged bin
421422
{
422423
if (sum >= threshold)
423-
bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
424+
bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx, bin], threshold);
424425
sum = 0u;
425426
}
426427
else if (bin + 1u == result.size() || // last bin
427-
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
428+
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin + 1]) // end of split bin
428429
{
429430
if (sum >= threshold)
430431
result_buffer[current_filename_index] = sum;

include/hibf/layout/hierarchical_binning.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
#include <hibf/build/bin_size_in_bits.hpp> // for bin_size_in_bits
1414
#include <hibf/config.hpp> // for config
1515
#include <hibf/layout/data_store.hpp> // for data_store
16-
#include <hibf/platform.hpp> // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY
16+
#include <hibf/misc/md_vector.hpp>
17+
#include <hibf/platform.hpp> // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY
1718

1819
namespace seqan::hibf::layout
1920
{
@@ -140,9 +141,9 @@ class hierarchical_binning
140141
*
141142
* \image html hierarchical_dp_init.png
142143
*/
143-
void initialization(std::vector<std::vector<size_t>> & matrix,
144-
std::vector<std::vector<size_t>> & ll_matrix,
145-
std::vector<std::vector<std::pair<size_t, size_t>>> & trace);
144+
void initialization(md_vector<size_t> & matrix,
145+
md_vector<size_t> & ll_matrix,
146+
md_vector<std::pair<size_t, size_t>> & trace);
146147

147148
/*!\brief Performs the recursion.
148149
*
@@ -182,9 +183,8 @@ class hierarchical_binning
182183
* this algorithm. It would be too computational intensive to compute the splitting for every possibility.
183184
*
184185
*/
185-
void recursion(std::vector<std::vector<size_t>> & matrix,
186-
std::vector<std::vector<size_t>> & ll_matrix,
187-
std::vector<std::vector<std::pair<size_t, size_t>>> & trace);
186+
void
187+
recursion(md_vector<size_t> & matrix, md_vector<size_t> & ll_matrix, md_vector<std::pair<size_t, size_t>> & trace);
188188

189189
void backtrack_merged_bin(size_t trace_j,
190190
size_t const next_j,
@@ -198,7 +198,7 @@ class hierarchical_binning
198198
maximum_bin_tracker & max_tracker);
199199

200200
//!\brief Backtracks the trace matrix and writes the resulting binning into the output file.
201-
size_t backtracking(std::vector<std::vector<std::pair<size_t, size_t>>> const & trace);
201+
size_t backtracking(md_vector<std::pair<size_t, size_t>> const & trace);
202202

203203
data_store initialise_libf_data(size_t const trace_j) const;
204204

include/hibf/misc/md_vector.hpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
2+
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
/*!\file
6+
* \brief Provides seqan::hibf::md_vector.
7+
* \author Enrico Seiler <enrico.seiler AT fu-berlin.de>
8+
*/
9+
10+
#pragma once
11+
12+
#include <vector> // for vector
13+
14+
#include <hibf/platform.hpp>
15+
16+
namespace seqan::hibf
17+
{
18+
19+
template <typename value_t>
20+
struct md_vector : public std::vector<std::vector<value_t>>
21+
{
22+
using base_t = std::vector<std::vector<value_t>>;
23+
using base_t::base_t;
24+
using base_t::operator[];
25+
#if defined(__cpp_explicit_this_parameter) && __cpp_explicit_this_parameter >= 202110L
26+
decltype(auto) operator[](this auto & self, size_t const x, size_t const y)
27+
{
28+
return self[x][y];
29+
}
30+
#else
31+
value_t & operator[](size_t const x, size_t const y)
32+
{
33+
return (*this)[x][y];
34+
}
35+
value_t const & operator[](size_t const x, size_t const y) const
36+
{
37+
return (*this)[x][y];
38+
}
39+
#endif
40+
};
41+
42+
} // namespace seqan::hibf

include/hibf/sketch/minhashes.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <cereal/access.hpp> // for access
1818
#include <cereal/cereal.hpp> // for make_nvp, CEREAL_NVP
1919

20+
#include <hibf/misc/md_vector.hpp>
2021
#include <hibf/platform.hpp>
2122

2223
namespace seqan::hibf::sketch
@@ -39,7 +40,7 @@ struct minhashes
3940
static constexpr size_t sketch_size{40};
4041

4142
//!\brief A table of sketches. For LSH we need multiple sketches, stored in a table.
42-
std::vector<std::vector<uint64_t>> table{}; // Each element (vector<uint64_t>) is a minhash.
43+
md_vector<uint64_t> table{}; // Each element (vector<uint64_t>) is a minhash.
4344

4445
/*!\name Constructors, destructor and assignment
4546
* \{

src/layout/hierarchical_binning.cpp

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,13 @@ size_t hierarchical_binning::execute()
5454
}
5555

5656
// technical bins (outer) = rows; user bins (inner) = columns
57-
std::vector<std::vector<size_t>> matrix(num_technical_bins, std::vector<size_t>(num_user_bins, max_size_t));
57+
md_vector<size_t> matrix(num_technical_bins, std::vector<size_t>(num_user_bins, max_size_t));
5858

5959
// technical bins (outer) = rows; user bins (inner) = columns
60-
std::vector<std::vector<size_t>> ll_matrix(num_technical_bins, std::vector<size_t>(num_user_bins, 0u));
60+
md_vector<size_t> ll_matrix(num_technical_bins, std::vector<size_t>(num_user_bins, 0u));
6161

6262
// technical bins (outer) = rows; user bins (inner) = columns
63-
std::vector<std::vector<std::pair<size_t, size_t>>> trace(
63+
md_vector<std::pair<size_t, size_t>> trace(
6464
num_technical_bins,
6565
std::vector<std::pair<size_t, size_t>>(num_user_bins, {max_size_t, max_size_t}));
6666

@@ -87,9 +87,9 @@ size_t hierarchical_binning::execute()
8787
return static_cast<size_t>(std::ceil(levels));
8888
}
8989

90-
void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & matrix,
91-
std::vector<std::vector<size_t>> & ll_matrix,
92-
std::vector<std::vector<std::pair<size_t, size_t>>> & trace)
90+
void hierarchical_binning::initialization(md_vector<size_t> & matrix,
91+
md_vector<size_t> & ll_matrix,
92+
md_vector<std::pair<size_t, size_t>> & trace)
9393
{
9494
assert(data != nullptr);
9595

@@ -99,8 +99,8 @@ void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & mat
9999
for (size_t i = 0; i < num_technical_bins; ++i)
100100
{
101101
size_t const corrected_ub_cardinality = static_cast<size_t>(ub_cardinality * data->fpr_correction[i + 1]);
102-
matrix[i][0] = divide_and_ceil(corrected_ub_cardinality, i + 1u);
103-
trace[i][0] = {0u, 0u}; // unnecessary?
102+
matrix[i, 0] = divide_and_ceil(corrected_ub_cardinality, i + 1u);
103+
trace[i, 0] = {0u, 0u}; // unnecessary?
104104
}
105105

106106
// initialize first row
@@ -118,9 +118,9 @@ void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & mat
118118
for (size_t j = 1; j < num_user_bins; ++j)
119119
{
120120
sum += (*data->kmer_counts)[data->positions[j]];
121-
matrix[0][j] = data->union_estimates[j];
122-
ll_matrix[0][j] = max_merge_levels(j + 1) * sum;
123-
trace[0][j] = {0u, j - 1}; // unnecessary?
121+
matrix[0, j] = data->union_estimates[j];
122+
ll_matrix[0, j] = max_merge_levels(j + 1) * sum;
123+
trace[0, j] = {0u, j - 1}; // unnecessary?
124124
}
125125
}
126126
else
@@ -130,16 +130,16 @@ void hierarchical_binning::initialization(std::vector<std::vector<size_t>> & mat
130130
assert(j < data->positions.size());
131131
assert(data->positions[j] < data->kmer_counts->size());
132132
sum += (*data->kmer_counts)[data->positions[j]];
133-
matrix[0][j] = sum;
134-
ll_matrix[0][j] = max_merge_levels(j + 1) * sum;
135-
trace[0][j] = {0u, j - 1}; // unnecessary?
133+
matrix[0, j] = sum;
134+
ll_matrix[0, j] = max_merge_levels(j + 1) * sum;
135+
trace[0, j] = {0u, j - 1}; // unnecessary?
136136
}
137137
}
138138
}
139139

140-
void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
141-
std::vector<std::vector<size_t>> & ll_matrix,
142-
std::vector<std::vector<std::pair<size_t, size_t>>> & trace)
140+
void hierarchical_binning::recursion(md_vector<size_t> & matrix,
141+
md_vector<size_t> & ll_matrix,
142+
md_vector<std::pair<size_t, size_t>> & trace)
143143
{
144144
assert(data != nullptr);
145145

@@ -182,23 +182,23 @@ void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
182182
size_t const corrected_ub_cardinality =
183183
static_cast<size_t>(ub_cardinality * data->fpr_correction[(i - i_prime)]);
184184
size_t score =
185-
std::max<size_t>(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime][j - 1]);
186-
size_t full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_matrix[i_prime][j - 1];
185+
std::max<size_t>(divide_and_ceil(corrected_ub_cardinality, i - i_prime), matrix[i_prime, j - 1]);
186+
size_t full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_matrix[i_prime, j - 1];
187187

188188
// std::cout << " ++ j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl;
189189

190190
if (full_score < full_minimum)
191191
{
192192
minimum = score;
193193
full_minimum = full_score;
194-
trace[i][j] = {i_prime, j - 1};
195-
ll_matrix[i][j] = ll_matrix[i_prime][j - 1];
194+
trace[i, j] = {i_prime, j - 1};
195+
ll_matrix[i, j] = ll_matrix[i_prime, j - 1];
196196
}
197197
}
198198

199199
// seqan3::debug_stream << "current vertical minimum of " << "j:" << j << " i:" << i
200200
// << " -> score:" << full_minimum << " (M_ij=" << minimum << ")"
201-
// << " trace:" << trace[i][j]
201+
// << " trace:" << trace[i, j]
202202
// << std::endl;
203203

204204
// check horizontal cells
@@ -216,16 +216,16 @@ void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
216216

217217
// if the user bin j-1 was not split into multiple technical bins!
218218
// I may merge the current user bin j into the former
219-
while (j_prime != 0 && ((i - trace[i][j_prime].first) < 2) && get_weight() < minimum)
219+
while (j_prime != 0 && ((i - trace[i, j_prime].first) < 2) && get_weight() < minimum)
220220
{
221221
weight += (*data->kmer_counts)[data->positions[j_prime]];
222222
--j_prime;
223223

224224
// score: The current maximum technical bin size for the high-level IBF (score for the matrix M)
225225
// ll_kmers: estimate for the number of k-mers that have to be resolved on lower levels
226226
// full_score: The score to minimize -> score * #TB-high_level + low_level_memory footprint
227-
size_t const score = std::max<size_t>(matrix[i - 1][j_prime], get_weight());
228-
size_t const ll_kmers = ll_matrix[i - 1][j_prime] + max_merge_levels(j - j_prime) * weight;
227+
size_t const score = std::max<size_t>(matrix[i - 1, j_prime], get_weight());
228+
size_t const ll_kmers = ll_matrix[i - 1, j_prime] + max_merge_levels(j - j_prime) * weight;
229229
size_t const full_score = score * (i + 1) /*#TBs*/ + config.alpha * ll_kmers;
230230

231231
// seqan3::debug_stream << " -- " << "j_prime:" << j_prime
@@ -236,12 +236,12 @@ void hierarchical_binning::recursion(std::vector<std::vector<size_t>> & matrix,
236236
{
237237
minimum = score;
238238
full_minimum = full_score;
239-
trace[i][j] = {i - 1, j_prime};
240-
ll_matrix[i][j] = ll_kmers;
239+
trace[i, j] = {i - 1, j_prime};
240+
ll_matrix[i, j] = ll_kmers;
241241
}
242242
}
243243

244-
matrix[i][j] = minimum;
244+
matrix[i, j] = minimum;
245245
}
246246
}
247247
}
@@ -307,7 +307,7 @@ void hierarchical_binning::backtrack_split_bin(size_t trace_j,
307307
// std::cout << "split " << trace_j << " into " << number_of_bins << ": " << cardinality_per_bin << std::endl;
308308
}
309309

310-
size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size_t, size_t>>> const & trace)
310+
size_t hierarchical_binning::backtracking(md_vector<std::pair<size_t, size_t>> const & trace)
311311
{
312312
assert(data != nullptr);
313313

@@ -323,8 +323,8 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size
323323
while (trace_j > 0u && trace_i > 0u)
324324
{
325325
// std::cout << "\t I am now at " << trace_i << "," << trace_j << std::endl;
326-
size_t next_i = trace[trace_i][trace_j].first;
327-
size_t next_j = trace[trace_i][trace_j].second;
326+
size_t next_i = trace[trace_i, trace_j].first;
327+
size_t next_j = trace[trace_i, trace_j].second;
328328

329329
size_t number_of_bins = (trace_i - next_i);
330330

@@ -339,7 +339,7 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size
339339
{
340340
backtrack_split_bin(trace_j, number_of_bins, bin_id, max_tracker);
341341

342-
trace_i = trace[trace_i][trace_j].first;
342+
trace_i = trace[trace_i, trace_j].first;
343343
--trace_j;
344344
}
345345

0 commit comments

Comments
 (0)