Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
strategy:
fail-fast: false
matrix:
compiler: ["clang-18", "clang-17", "gcc-14", "gcc-13", "gcc-12", "gcc-11", "intel"]
compiler: ["clang-18", "clang-17", "gcc-14", "gcc-13", "gcc-12", "intel"]
container:
image: ghcr.io/seqan/${{ matrix.compiler }}
volumes:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
strategy:
fail-fast: false
matrix:
compiler: ["clang-18", "clang-17", "gcc-14", "gcc-13", "gcc-12", "gcc-11"]
compiler: ["clang-18", "clang-17", "gcc-14", "gcc-13", "gcc-12"]
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/ci_misc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ jobs:
use_include_dependencies: "OFF"
cxx_flags: "-stdlib=libc++"

- name: "Snippet gcc11"
compiler: "gcc-11"
- name: "Snippet gcc12"
compiler: "gcc-12"
build: snippet
build_type: Release
use_include_dependencies: "OFF"
Expand All @@ -53,8 +53,8 @@ jobs:
use_include_dependencies: "OFF"
cxx_flags: "-stdlib=libc++"

- name: "Performance gcc11"
compiler: "gcc-11"
- name: "Performance gcc12"
compiler: "gcc-12"
build: performance
build_type: Release
use_include_dependencies: "OFF"
Expand All @@ -72,8 +72,8 @@ jobs:
build_type: Release
use_include_dependencies: "OFF"

- name: "Header gcc11"
compiler: "gcc-11"
- name: "Header gcc12"
compiler: "gcc-12"
build: header
build_type: Release
use_include_dependencies: "OFF"
Expand All @@ -85,7 +85,7 @@ jobs:
use_include_dependencies: "ON"

- name: "Tutorial"
compiler: "gcc-11"
compiler: "gcc-12"
build: tutorial
build_type: Debug
use_include_dependencies: "OFF"
Expand Down
4 changes: 2 additions & 2 deletions cmake/CPM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#
# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors

set (CPM_DOWNLOAD_VERSION 0.38.6)
set (CPM_HASH_SUM "11c3fa5f1ba14f15d31c2fb63dbc8628ee133d81c8d764caad9a8db9e0bacb07")
set (CPM_DOWNLOAD_VERSION 0.40.2)
set (CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d")

if (CPM_SOURCE_CACHE)
set (CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
Expand Down
35 changes: 17 additions & 18 deletions cmake/configuration.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -79,43 +79,42 @@ include ("${HIBF_SOURCE_DIR}/test/cmake/hibf_require_ccache.cmake")
hibf_require_ccache ()

# ----------------------------------------------------------------------------
# Require C++20
# Require C++23
# ----------------------------------------------------------------------------

set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})

set (CXXSTD_TEST_SOURCE
"#if !defined (__cplusplus) || (__cplusplus < 202002)
#error NOCXX20
"#if !defined (__cplusplus) || (__cplusplus < 202100)
#error NOCXX23
#endif
int main() {}")

set (HIBF_FEATURE_CPP20_FLAG_BUILTIN "")
set (HIBF_FEATURE_CPP20_FLAG_STD20 "-std=c++20")
set (HIBF_FEATURE_CPP20_FLAG_STD2a "-std=c++2a")
set (HIBF_FEATURE_CPP23_FLAG_BUILTIN "")
set (HIBF_FEATURE_CPP23_FLAG_STD23 "-std=c++23")

set (HIBF_CPP20_FLAG "")
set (HIBF_CPP23_FLAG "")

foreach (_FLAG BUILTIN STD20 STD2a)
set (CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} ${HIBF_FEATURE_CPP20_FLAG_${_FLAG}}")
foreach (_FLAG BUILTIN STD23)
set (CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} ${HIBF_FEATURE_CPP23_FLAG_${_FLAG}}")

check_cxx_source_compiles ("${CXXSTD_TEST_SOURCE}" CPP20_FLAG_${_FLAG})
check_cxx_source_compiles ("${CXXSTD_TEST_SOURCE}" CPP23_FLAG_${_FLAG})

if (CPP20_FLAG_${_FLAG})
set (HIBF_CPP20_FLAG ${_FLAG})
if (CPP23_FLAG_${_FLAG})
set (HIBF_CPP23_FLAG ${_FLAG})
break ()
endif ()
endforeach ()

set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})

if (HIBF_CPP20_FLAG STREQUAL "BUILTIN")
hibf_config_print ("C++ Standard-20 support: builtin")
elseif (HIBF_CPP20_FLAG)
set (HIBF_CXX_FLAGS "${HIBF_CXX_FLAGS} ${HIBF_FEATURE_CPP20_FLAG_${HIBF_CPP20_FLAG}}")
hibf_config_print ("C++ Standard-20 support: via ${HIBF_FEATURE_CPP20_FLAG_${HIBF_CPP20_FLAG}}")
if (HIBF_CPP23_FLAG STREQUAL "BUILTIN")
hibf_config_print ("C++ Standard-23 support: builtin")
elseif (HIBF_CPP23_FLAG)
set (HIBF_CXX_FLAGS "${HIBF_CXX_FLAGS} ${HIBF_FEATURE_CPP23_FLAG_${HIBF_CPP23_FLAG}}")
hibf_config_print ("C++ Standard-23 support: via ${HIBF_FEATURE_CPP23_FLAG_${HIBF_CPP23_FLAG}}")
else ()
hibf_config_error ("HIBF requires C++20, but your compiler does not support it.")
hibf_config_error ("HIBF requires C++23, but your compiler does not support it.")
endif ()

# ----------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion cmake/package-lock.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ CPMDeclarePackage (benchmark
# googletest
set (HIBF_GOOGLETEST_VERSION 1.14.0)
CPMDeclarePackage (googletest
NAME GTest
NAME googletest
VERSION ${HIBF_GOOGLETEST_VERSION}
GITHUB_REPOSITORY google/googletest
SYSTEM TRUE
Expand Down
21 changes: 11 additions & 10 deletions include/hibf/hierarchical_interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/misc/counting_vector.hpp> // for counting_vector
#include <hibf/misc/timer.hpp> // for concurrent_timer
#include <hibf/platform.hpp> // for HIBF_CONSTEXPR_VECTOR
#include <hibf/misc/md_vector.hpp>
#include <hibf/misc/timer.hpp> // for concurrent_timer
#include <hibf/platform.hpp> // for HIBF_CONSTEXPR_VECTOR

namespace seqan::hibf
{
Expand Down Expand Up @@ -202,7 +203,7 @@ class hierarchical_interleaved_bloom_filter
* If `j != i` is returned, there is a lower level IBF, bin `b` is a merged bin, and `j` is the ID of the lower
* level IBF in ibf_vector.
*/
std::vector<std::vector<int64_t>> next_ibf_id;
md_vector<int64_t> next_ibf_id;

/*!\brief Stores for each bin in each IBF of the HIBF the user bin ID.
* \details
Expand All @@ -211,7 +212,7 @@ class hierarchical_interleaved_bloom_filter
* lower level IBF.
* Otherwise, the returned value `j` is the corresponding user bin ID.
*/
std::vector<std::vector<int64_t>> ibf_bin_to_user_bin_id{};
md_vector<int64_t> ibf_bin_to_user_bin_id{};

//!\brief Returns a membership_agent to be used for counting.
membership_agent_type membership_agent() const;
Expand Down Expand Up @@ -280,16 +281,16 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type
{
sum += result[bin];

auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin];

if (current_filename_index < 0) // merged bin
{
if (sum >= threshold)
membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx, bin], threshold);
sum = 0u;
}
else if (bin + 1u == result.size() || // last bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin + 1]) // end of split bin
{
if (sum >= threshold)
result_buffer.emplace_back(current_filename_index);
Expand Down Expand Up @@ -415,16 +416,16 @@ class hierarchical_interleaved_bloom_filter::counting_agent_type
for (size_t bin{}; bin < result.size(); ++bin)
{
sum += result[bin];
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin];

if (current_filename_index < 0) // merged bin
{
if (sum >= threshold)
bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx, bin], threshold);
sum = 0u;
}
else if (bin + 1u == result.size() || // last bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx, bin + 1]) // end of split bin
{
if (sum >= threshold)
result_buffer[current_filename_index] = sum;
Expand Down
16 changes: 8 additions & 8 deletions include/hibf/layout/hierarchical_binning.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
#include <hibf/build/bin_size_in_bits.hpp> // for bin_size_in_bits
#include <hibf/config.hpp> // for config
#include <hibf/layout/data_store.hpp> // for data_store
#include <hibf/platform.hpp> // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY
#include <hibf/misc/md_vector.hpp>
#include <hibf/platform.hpp> // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY

namespace seqan::hibf::layout
{
Expand Down Expand Up @@ -140,9 +141,9 @@ class hierarchical_binning
*
* \image html hierarchical_dp_init.png
*/
void initialization(std::vector<std::vector<size_t>> & matrix,
std::vector<std::vector<size_t>> & ll_matrix,
std::vector<std::vector<std::pair<size_t, size_t>>> & trace);
void initialization(md_vector<size_t> & matrix,
md_vector<size_t> & ll_matrix,
md_vector<std::pair<size_t, size_t>> & trace);

/*!\brief Performs the recursion.
*
Expand Down Expand Up @@ -182,9 +183,8 @@ class hierarchical_binning
* this algorithm. It would be too computational intensive to compute the splitting for every possibility.
*
*/
void recursion(std::vector<std::vector<size_t>> & matrix,
std::vector<std::vector<size_t>> & ll_matrix,
std::vector<std::vector<std::pair<size_t, size_t>>> & trace);
void
recursion(md_vector<size_t> & matrix, md_vector<size_t> & ll_matrix, md_vector<std::pair<size_t, size_t>> & trace);

void backtrack_merged_bin(size_t trace_j,
size_t const next_j,
Expand All @@ -198,7 +198,7 @@ class hierarchical_binning
maximum_bin_tracker & max_tracker);

//!\brief Backtracks the trace matrix and writes the resulting binning into the output file.
size_t backtracking(std::vector<std::vector<std::pair<size_t, size_t>>> const & trace);
size_t backtracking(md_vector<std::pair<size_t, size_t>> const & trace);

data_store initialise_libf_data(size_t const trace_j) const;

Expand Down
42 changes: 42 additions & 0 deletions include/hibf/misc/md_vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

/*!\file
* \brief Provides seqan::hibf::md_vector.
* \author Enrico Seiler <enrico.seiler AT fu-berlin.de>
*/

#pragma once

#include <vector> // for vector

#include <hibf/platform.hpp>

namespace seqan::hibf
{

template <typename value_t>
struct md_vector : public std::vector<std::vector<value_t>>
{
using base_t = std::vector<std::vector<value_t>>;
using base_t::base_t;
using base_t::operator[];
#if defined(__cpp_explicit_this_parameter) && __cpp_explicit_this_parameter >= 202110L
decltype(auto) operator[](this auto & self, size_t const x, size_t const y)
{
return self[x][y];
}
#else
value_t & operator[](size_t const x, size_t const y)
{
return (*this)[x][y];
}
value_t const & operator[](size_t const x, size_t const y) const
{
return (*this)[x][y];
}
#endif
};

} // namespace seqan::hibf
15 changes: 5 additions & 10 deletions include/hibf/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,9 @@
// ============================================================================

#if HIBF_COMPILER_IS_GCC
# if (__GNUC__ < 11)
# error "At least GCC 11 is needed."
# endif // (__GNUC__ < 11)

# if (__GNUC__ == 11 && __GNUC_MINOR__ <= 3)
# pragma warning "Be aware that GCC < 11.4 might have bugs that cause compile failure."
# endif // (__GNUC__ == 11 && __GNUC_MINOR__ <= 3)
# if (__GNUC__ < 12)
# error "At least GCC 12 is needed."
# endif // (__GNUC__ < 12)

# if (__GNUC__ == 12 && __GNUC_MINOR__ <= 2)
# pragma warning "Be aware that GCC < 12.3 might have bugs that cause compile failure."
Expand Down Expand Up @@ -100,10 +96,9 @@
#endif

// C++ standard [required]
// Note: gcc10 -std=c++20 still defines __cplusplus=201709
#ifdef __cplusplus
# if (__cplusplus < 201709)
# error "C++20 is required, make sure that you have set -std=c++20."
# if (__cplusplus < 202100)
# error "C++23 is required, make sure that you have set -std=c++23."
# endif
#else
# error "This is not a C++ compiler."
Expand Down
3 changes: 2 additions & 1 deletion include/hibf/sketch/minhashes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <cereal/access.hpp> // for access
#include <cereal/cereal.hpp> // for make_nvp, CEREAL_NVP

#include <hibf/misc/md_vector.hpp>
#include <hibf/platform.hpp>

namespace seqan::hibf::sketch
Expand All @@ -39,7 +40,7 @@ struct minhashes
static constexpr size_t sketch_size{40};

//!\brief A table of sketches. For LSH we need multiple sketches, stored in a table.
std::vector<std::vector<uint64_t>> table{}; // Each element (vector<uint64_t>) is a minhash.
md_vector<uint64_t> table{}; // Each element (vector<uint64_t>) is a minhash.

/*!\name Constructors, destructor and assignment
* \{
Expand Down
Loading