diff --git a/CMakeLists.txt b/CMakeLists.txt index b12b124c..8202750f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,5 +57,10 @@ install(FILES enable_testing() +option(ENABLE_BENCH "Set to on in order to compile bench suite, work only in release mode" OFF) + add_subdirectory(simdpp) add_subdirectory(test) +if(ENABLE_BENCH) + add_subdirectory(bench) +endif() diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt new file mode 100644 index 00000000..53f393a9 --- /dev/null +++ b/bench/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(thirdparty) +add_subdirectory(insn) \ No newline at end of file diff --git a/bench/insn/CMakeLists.txt b/bench/insn/CMakeLists.txt new file mode 100644 index 00000000..01c3b299 --- /dev/null +++ b/bench/insn/CMakeLists.txt @@ -0,0 +1,51 @@ +include_directories(${libsimdpp_SOURCE_DIR}) +include_directories(${GOOGLE_BENCHMARK_INCLUDE_DIRS}) + +set(TEST_BENCH_SOURCES + main.cc + main.h +) + +set(BENCH_INSN_ARCH_SOURCES + algorithm/transform_unary.cc + algorithm/transform_binary.cc + algorithm/reduce_unary.cc + algorithm/reduce_binary.cc + load_store.cc + ) + +set_property(GLOBAL PROPERTY USE_FOLDERS ON) +foreach(ARCH ${COMPILABLE_ARCHS}}) + simdpp_get_arch_info(CXX_FLAGS DEFINES_LIST SUFFIX ${ARCH}) + #message("Create benchmark for arch : ${SUFFIX} with flags: ${CXX_FLAGS} with defines ${DEFINES_LIST}") + SET(exename "bench_insn_${SUFFIX}") + add_executable(${exename} ${BENCH_INSN_ARCH_SOURCES} ${TEST_BENCH_SOURCES}) + set_target_properties( ${exename} PROPERTIES COMPILE_FLAGS "${CXX_FLAGS}" ) + set_target_properties (${exename} PROPERTIES FOLDER bench) + if(SIMDPP_MSVC) + if(CMAKE_SIZEOF_VOID_P EQUAL 4) + # enable _vectorcall on i386 builds (only works on MSVC 2013) + #set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Gv") + endif() + elseif(SIMDPP_MSVC_INTEL) + set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Qstd=c++11") + else() + # Xcode clang linker spends very long time in deduplication pass when + # linking the test executable unless -fvisibility-inlines-hidden is passed. + set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "-std=c++11 -O2 -Wall -Wextra -fvisibility-inlines-hidden") + endif() + if(WIN32) + target_link_libraries(${exename} + PUBLIC benchmark + PUBLIC shlwapi.lib + ) + else() + target_link_libraries(${exename} + PUBLIC benchmark + PUBLIC pthread + ) + add_dependencies(${exename} ${GOOGLE_BENCHMARK}) + endif() +endforeach(ARCH ${${COMPILABLE_ARCHS}}) + + diff --git a/bench/insn/algorithm/reduce_binary.cc b/bench/insn/algorithm/reduce_binary.cc new file mode 100644 index 00000000..82c0eb74 --- /dev/null +++ b/bench/insn/algorithm/reduce_binary.cc @@ -0,0 +1,155 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "benchmark/benchmark.h" +#include +#include +#include +#include +//algorithm +#include + + +namespace { + +template< typename T> +struct BinaryOpPlus +{ +public: + BinaryOpPlus() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT + { + return a0 + a1; + } + + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT + { + return a0 + a1; + } +}; + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + + +template +std::vector::alignment>> DataGenerator(std::size_t size, Generator gen) +{ + + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t input(size); + std::generate(input.begin(), input.end(), gen); + return input; +} + +/*********************UNARY****************************/ + +template +class ReduceBinaryFixture : public ::benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect = DataGenerator>((size_t)st.range(0), GeneratorConstant(1)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; +}; + +//UINT64_T +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + uint64_t init = (uint64_t)0; + auto opPlus = BinaryOpPlus(); + uint64_t neutral = (uint64_t)0; + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + uint64_t init = (uint64_t)0; + auto opPlus = BinaryOpPlus(); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +//FLOAT +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + float init = (float)0; + auto opPlus = BinaryOpPlus(); + float neutral = (float)0; + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + float init = (float)0; + auto opPlus = BinaryOpPlus(); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +//DOUBLE +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + double init = (double)0; + auto opPlus = BinaryOpPlus(); + double neutral = (double)0; + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + double init = (double)0; + auto opPlus = BinaryOpPlus(); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + + +} // namespace \ No newline at end of file diff --git a/bench/insn/algorithm/reduce_unary.cc b/bench/insn/algorithm/reduce_unary.cc new file mode 100644 index 00000000..de7546d8 --- /dev/null +++ b/bench/insn/algorithm/reduce_unary.cc @@ -0,0 +1,122 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "benchmark/benchmark.h" +#include +#include +#include +#include +//algorithm +#include + + +namespace { + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + + +template +std::vector::alignment>> DataGenerator(std::size_t size, Generator gen) +{ + + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t input(size); + std::generate(input.begin(), input.end(), gen); + return input; +} + +/*********************UNARY****************************/ + +template +class ReduceUnaryFixture : public ::benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect = DataGenerator>((size_t)st.range(0), GeneratorConstant(1)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; +}; + +//UINT64_T +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(),(uint64_t)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (uint64_t)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +//FLOAT +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryFLOAT_SIMD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), (float)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryFLOAT_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryFLOAT_STD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (float)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryFLOAT_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//DOUBLE +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryDOUBLE_SIMD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), (double)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryDOUBLE_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryDOUBLE_STD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (double)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryDOUBLE_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + + +} // namespace \ No newline at end of file diff --git a/bench/insn/algorithm/transform_binary.cc b/bench/insn/algorithm/transform_binary.cc new file mode 100644 index 00000000..5ffe1a6e --- /dev/null +++ b/bench/insn/algorithm/transform_binary.cc @@ -0,0 +1,209 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "benchmark/benchmark.h" +#include +#include +#include +#include +//algorithm +#include + + +namespace { + +template< typename T> +struct BinaryOpAdd +{ +public: + BinaryOpAdd() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT + { + return a0 + a1; + } + + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT + { + using namespace simdpp; + return a0 + a1; + } +}; + + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + + +template +std::vector::alignment>> DataGenerator(std::size_t size, Generator gen) +{ + + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t input(size); + std::generate(input.begin(), input.end(), gen); + return input; +} + +/*********************Binary****************************/ + +template +class TransformBinaryFixture : public ::benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect = DataGenerator>((size_t)st.range(0), GeneratorConstant(42)); + m_inputvect2 = DataGenerator>((size_t)st.range(0), GeneratorConstant(42)); + m_outputvect.resize((size_t)st.range(0)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + m_inputvect2.clear(); + m_outputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; + vector_aligned_t m_inputvect2; + vector_aligned_t m_outputvect; + BinaryOpAdd opPlus= BinaryOpAdd(); +}; + +//UINT8_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(),m_inputvect2.data(),m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT8_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT8_STD_Test, uint8_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(),m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT8_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT16_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT16_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT16_STD_Test, uint16_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT16_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT32_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT32_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT32_STD_Test, uint32_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT32_STD_Test)->Arg(1)->Arg(10)->Arg(31)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT64_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//FLOAT +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryFloat_SIMD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryFloat_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryFloat_STD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryFloat_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//DOUBLE +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryDouble_SIMD_Test, double)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryDouble_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryDouble_STD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryDouble_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +} // namespace \ No newline at end of file diff --git a/bench/insn/algorithm/transform_unary.cc b/bench/insn/algorithm/transform_unary.cc new file mode 100644 index 00000000..b0508f09 --- /dev/null +++ b/bench/insn/algorithm/transform_unary.cc @@ -0,0 +1,205 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "benchmark/benchmark.h" +#include +#include +#include +#include +//algorithm +#include + + +namespace { + +template< typename T> +struct UnaryOpAddValue +{ + T m_val; +public: + UnaryOpAddValue(T val) :m_val(val) {} + SIMDPP_INL T operator()(T const &a) const SIMDPP_NOEXCEPT + { + return m_val + a; + } + + template + SIMDPP_INL U operator()(U const &a) const SIMDPP_NOEXCEPT + { + return simdpp::add(m_val,a); + } +}; + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + + +template +std::vector::alignment>> DataGenerator(std::size_t size, Generator gen) +{ + + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t input(size); + std::generate(input.begin(), input.end(), gen); + return input; +} + +/*********************UNARY****************************/ + +template +class TransformUnaryFixture : public ::benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect = DataGenerator>((size_t)st.range(0), GeneratorConstant(42)); + m_outputvect.resize((size_t)st.range(0)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + m_outputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; + vector_aligned_t m_outputvect; + UnaryOpAddValue opPlusOne= UnaryOpAddValue(1); +}; + +//UINT8_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT8_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT8_STD_Test, uint8_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT8_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT16_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_STD_Test, uint16_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT32_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_STD_Test, uint32_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_STD_Test)->Arg(1)->Arg(10)->Arg(31)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT64_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//FLOAT +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_SIMD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_STD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//DOUBLE +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_SIMD_Test, double)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_STD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +} // namespace \ No newline at end of file diff --git a/bench/insn/load_store.cc b/bench/insn/load_store.cc new file mode 100644 index 00000000..5a245dd7 --- /dev/null +++ b/bench/insn/load_store.cc @@ -0,0 +1,264 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + + +#include "benchmark/benchmark.h" +#include +#include +#include +#include + +namespace { + + template + class LoadStoreFixture : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect.resize((size_t)st.range(0)); + std::fill(m_inputvect.begin(), m_inputvect.end(),(T)42); + m_outputvect.resize((size_t)st.range(0)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + m_outputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; + vector_aligned_t m_outputvect; + }; + + //UINT8_T + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT8_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT8_STD_Test, uint8_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++=*ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT8_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //UINT16 + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT16_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT16_STD_Test, uint16_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT16_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //UINT32 + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT32_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT32_STD_Test, uint32_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT32_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //UINT64 + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT64_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT64_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //FLOAT + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryFloat_SIMD_Test, float)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryFloat_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryFloat_STD_Test, float)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryFloat_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //DOUBLE + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryDouble_SIMD_Test, double)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryDouble_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryDouble_STD_Test, double)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryDouble_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); +} // namespace diff --git a/bench/insn/main.cc b/bench/insn/main.cc new file mode 100644 index 00000000..d50df491 --- /dev/null +++ b/bench/insn/main.cc @@ -0,0 +1,13 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + + +#include "main.h" +#include "benchmark/benchmark.h" + +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/bench/insn/main.h b/bench/insn/main.h new file mode 100644 index 00000000..75f10597 --- /dev/null +++ b/bench/insn/main.h @@ -0,0 +1,15 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef SIMDPP_MAIN_INSN_BENCH_H +#define SIMDPP_MAIN_INSN_BENCH_H + +#include + +#endif //SIMDPP_MAIN_INSN_BENCH_H + diff --git a/bench/thirdparty/CMakeLists.txt b/bench/thirdparty/CMakeLists.txt new file mode 100644 index 00000000..9166fe08 --- /dev/null +++ b/bench/thirdparty/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 2.8.0) + +#Adapted from https://github.com/dream3d/Dream3DSdkBuild/blob/master/Boost.cmake +message("Create ExternalProject google benchmark has thirdpartie lib") + +include(ExternalProject) +# clone approach +find_package(git QUIET) +ExternalProject_Add(googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG master + PREFIX ${CMAKE_CURRENT_BINARY_DIR} + #--Configure step------------- + CMAKE_ARGS + -DBUILD_TESTING:BOOL=OFF + -DBENCHMARK_ENABLE_TESTING:BOOL=OFF + -DBENCHMARK_ENABLE_LTO:BOOL=OFF + -DCMAKE_BUILD_TYPE=Release + #--Build step----------------- + UPDATE_COMMAND "" # Skip annoying updates for every build + #--Install step----------------- + INSTALL_COMMAND "" + ) + +ExternalProject_Get_Property(googlebenchmark TMP_DIR STAMP_DIR DOWNLOAD_DIR SOURCE_DIR BINARY_DIR INSTALL_DIR) +message("Build googlebenchmark src ${SOURCE_DIR} in ${BINARY_DIR}") +set_property(GLOBAL PROPERTY USE_FOLDERS ON) +set(GOOGLE_BENCHMARK_INCLUDE_DIRS "${SOURCE_DIR}/include") +set(GOOGLE_BENCHMARK_INCLUDE_DIRS ${GOOGLE_BENCHMARK_INCLUDE_DIRS} PARENT_SCOPE) +set(GOOGLE_BENCHMARK googlebenchmark PARENT_SCOPE) +set_target_properties (googlebenchmark PROPERTIES FOLDER bench) +add_library(benchmark STATIC IMPORTED GLOBAL) #MARK AS IMPORTED AND GLOBAL SCOPE +set_target_properties (benchmark PROPERTIES FOLDER bench) +if(WIN32) +set_target_properties(benchmark PROPERTIES IMPORTED_LOCATION ${BINARY_DIR}/src/Release/benchmark.lib) +else() +set_target_properties(benchmark PROPERTIES IMPORTED_LOCATION ${BINARY_DIR}/src/libbenchmark.a) +endif() diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index 6df4ee6c..976cdbf2 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -20,7 +20,7 @@ various variants of matrix transpositions, interleaving loads/stores, optimized compile-time shuffling instructions, etc. Each of these are implemented in the most efficient manner for the target instruction set. Finally, it's possible to fall back to native intrinsics when necessary, without compromising -maintanability. +maintainability. The library sits somewhere in the middle between programming directly in @@ -173,6 +173,32 @@ the compiler will generate. {{ltt|misc/prefetch_read}}  {{ltt|misc/prefetch_write}} +'''[[Algorithm | STL like algorithm]]''' +
+{{ltt|algorithm/any_of}}
+{{ltt|algorithm/all_of}}
+{{ltt|algorithm/copy}}
+{{ltt|algorithm/copy_n}}
+{{ltt|algorithm/count}}
+{{ltt|algorithm/count_if}}
+{{ltt|algorithm/equal}}
+{{ltt|algorithm/fill}}
+{{ltt|algorithm/find}}
+{{ltt|algorithm/find_if}}
+{{ltt|algorithm/find_if_not}}
+{{ltt|algorithm/lexicographical_compare}}
+{{ltt|algorithm/max}}
+{{ltt|algorithm/max_element}}
+{{ltt|algorithm/min}}
+{{ltt|algorithm/min_element}}
+{{ltt|algorithm/none_of}}
+{{ltt|algorithm/reduce}}
+{{ltt|algorithm/replace}}
+{{ltt|algorithm/replace_if}}
+{{ltt|algorithm/transform}}
+{{ltt|algorithm/transform_reduce}}
+
+ |- class="row rowbottom" | colspan=4| |} diff --git a/doc/wiki/algorithm/all_of.mwiki b/doc/wiki/algorithm/all_of.mwiki new file mode 100644 index 00000000..f47536a4 --- /dev/null +++ b/doc/wiki/algorithm/all_of.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|all_of}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + bool all_of(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|all_of}} Checks if unary predicate p returns true for {{tt|all}} elements in the range {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate +{{par end}} + +===Return value=== +{{tt|true}} if unary predicate returns {{tt|true}} for all elements in the range, {{tt|false}} otherwise. Returns {{tt|true}} if the range is empty. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc none_of}} +{{dsc inc | algorithm/dsc any_of}} +{{dsc end}} diff --git a/doc/wiki/algorithm/any_of.mwiki b/doc/wiki/algorithm/any_of.mwiki new file mode 100644 index 00000000..678837c6 --- /dev/null +++ b/doc/wiki/algorithm/any_of.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|any_of}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + bool any_of(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|all_of}} Checks if unary predicate p returns true for {{tt|at least one}} element in the range {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate +{{par end}} + +===Return value=== +{{tt|true}} if unary predicate returns {{tt|true}} for at least one element in the range, {{tt|false}} otherwise. Returns {{tt|false}} if the range is empty. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc none_of}} +{{dsc inc | algorithm/dsc all_of}} +{{dsc end}} diff --git a/doc/wiki/algorithm/copy.mwiki b/doc/wiki/algorithm/copy.mwiki new file mode 100644 index 00000000..31c21712 --- /dev/null +++ b/doc/wiki/algorithm/copy.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|copy}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T* copy(T const* first, T const* last, T* out) +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|copy}} Copies the elements in the range, defined by {{tt|[first, last)}}, to another range beginning at {{tt|out}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to copy}} +{{par | out | the beginning of the destination range.}} +{{par end}} + +===Return value=== +Output adress to the element in the destination range, one past the last element copied. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc fill}} +{{dsc inc | algorithm/dsc copy_n}} +{{dsc end}} diff --git a/doc/wiki/algorithm/copy_n.mwiki b/doc/wiki/algorithm/copy_n.mwiki new file mode 100644 index 00000000..ad0e73c1 --- /dev/null +++ b/doc/wiki/algorithm/copy_n.mwiki @@ -0,0 +1,28 @@ +{{simdpp/title|copy}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T* copy_n(T const* first, Size n, T* out) +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|copy_n}} Copies exactly {{tt|Size}} values from the range beginning at {{tt|first}} to the range beginning at {{tt|out}}, if {{tt|count>0}}. Does nothing otherwise. + +===Parameters=== +{{par begin}} +{{par | first | the beginning of the range of elements to copy from}} +{{par | Size | number of the elements to copy}} +{{par | out | the beginning of the destination range}} +{{par end}} + +===Return value=== +Output adress in the destination range, pointing past the last element copied if {{tt|Size>0}} or {{tt|out}} otherwise. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc fill}} +{{dsc inc | algorithm/dsc copy}} +{{dsc end}} diff --git a/doc/wiki/algorithm/count.mwiki b/doc/wiki/algorithm/count.mwiki new file mode 100644 index 00000000..0e9eb1e9 --- /dev/null +++ b/doc/wiki/algorithm/count.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|count}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + typename std::iterator_traits::difference_type + count(T const* first, T const* last, U val); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|count}} counts the elements that are equal to value in the range defined by {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | val | the value to search for}} +{{par end}} + +===Return value=== +number of elements satisfying the condition. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc count_if}} +{{dsc end}} diff --git a/doc/wiki/algorithm/count_if.mwiki b/doc/wiki/algorithm/count_if.mwiki new file mode 100644 index 00000000..592b7032 --- /dev/null +++ b/doc/wiki/algorithm/count_if.mwiki @@ -0,0 +1,28 @@ +{{simdpp/title|count_if}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + typename std::iterator_traits::difference_type + count_if(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|count_if}} counts the elements that satisfied the predicate in the range defined by {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate which returns {{tt|true}} for the required elements. }} +{{par end}} + +===Return value=== +number of elements satisfying the condition. + + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc count}} +{{dsc end}} diff --git a/doc/wiki/algorithm/equal.mwiki b/doc/wiki/algorithm/equal.mwiki new file mode 100644 index 00000000..3cee2008 --- /dev/null +++ b/doc/wiki/algorithm/equal.mwiki @@ -0,0 +1,35 @@ +{{simdpp/title|equal}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + bool equal(const T* first1, const T* last1, const T* first2); +}} +{{dcl | num=2 | + template + bool equal(const T* first1, const T* last1, const T* first2,BinaryPredicate pred); + +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|equal}} Returns {{c|true}} if the range {{tt|[first1,last1)}} is equal to the range {{tt|[first2, first2 + (last1 - first1))}} according to pred and {{c|false}} otherwise + +@1@ Returns {{c|true}} if the range {{tt|[first1,last1)}} is equal to the range {{tt|[first2, first2 + (last1 - first1))}}, and {{c|false}} otherwise +@2@ The binary operation BinaryPredicate is applied to pairs of elements from two ranges: one defined by [first1, last1) and the other beginning at first2. + +===Parameters=== +{{par begin}} +{{par | first1, last1 | the first range of elements to examine} +{{par | first2 | the beginning of the second range of elements to examine}} +{{par | pred | binary predicate which returns ​true if the elements should be treated as equal.}} +{{par end}} + +===Return value=== +If the elements in the two ranges are equal, returns {{c|true}}.Otherwise returns {{c|false}}. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc find }} +{{dsc inc | algorithm/dsc lexicographic_compare }} +{{dsc end}} diff --git a/doc/wiki/algorithm/fill.mwiki b/doc/wiki/algorithm/fill.mwiki new file mode 100644 index 00000000..46648738 --- /dev/null +++ b/doc/wiki/algorithm/fill.mwiki @@ -0,0 +1,26 @@ +{{simdpp/title|fill}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + void fill(T* first, T* last, U value) +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|fill}} Assigns the given {{tt|value}} to the elements in the range {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to modify}} +{{par | value | the value to be assigned}} +{{par end}} + +===Return value=== +(none) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc copy}} +{{dsc end}} diff --git a/doc/wiki/algorithm/find.mwiki b/doc/wiki/algorithm/find.mwiki new file mode 100644 index 00000000..e7b2489d --- /dev/null +++ b/doc/wiki/algorithm/find.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|find}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T const* find(T const* first, T const* last, U val); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|find}} Returns the first element in the range [first, last[ that equal val. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | value | value to compare the elements to.}} +{{par end}} + +===Return value=== +Address to the first element satisfying the condition or last if no such element is found. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc find_if}} +{{dsc inc | algorithm/dsc find_if_not}} +{{dsc end}} diff --git a/doc/wiki/algorithm/find_if.mwiki b/doc/wiki/algorithm/find_if.mwiki new file mode 100644 index 00000000..39c048ce --- /dev/null +++ b/doc/wiki/algorithm/find_if.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|find_if}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T const* find_if(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|find_if}} Returns the first element in the range [first, last[ that satisfies specific predicate. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate which returns {{true}} for the required element.}} +{{par end}} + +===Return value=== +Address to the first element satisfying the condition or last if no such element is found. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc find}} +{{dsc inc | algorithm/dsc find_if_not}} +{{dsc end}} diff --git a/doc/wiki/algorithm/find_if_not.mwiki b/doc/wiki/algorithm/find_if_not.mwiki new file mode 100644 index 00000000..997808cc --- /dev/null +++ b/doc/wiki/algorithm/find_if_not.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|find_if_not}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T const* find_if_not(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|find_if}} Returns the first element in the range [first, last[ that {{not}} satisfies specific predicate. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate which returns {{false}} for the required element.}} +{{par end}} + +===Return value=== +Address to the first element satisfying the condition or last if no such element is found. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc find}} +{{dsc inc | algorithm/dsc find_if}} +{{dsc end}} diff --git a/doc/wiki/algorithm/lexicographical_compare.mwiki b/doc/wiki/algorithm/lexicographical_compare.mwiki new file mode 100644 index 00000000..363f6e7e --- /dev/null +++ b/doc/wiki/algorithm/lexicographical_compare.mwiki @@ -0,0 +1,34 @@ +{{simdpp/title|lexicographical_compare}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2); +}} +{{dcl | num=2 | + + template + bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2,BinarayPredicate comp); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|lexicographical_compare}} Checks if the first range {{c|[first1, last1)}} is lexicographically ''less'' than the second range {{c|[first2, last2)}}. + +@1@ Elements are compared using {{tt|operator<}}. +@2@ Elements are compared using the given binary comparison function {{tt|comp}}. + +===Parameters=== +{{par begin}} +{{par | first1, last1 | the first range of elements to examine}} +{{par | first2, last2 | the second range of elements to examine}} +{{par | comp | comparaison operator}} +{{par end}} + +===Return value=== +{{c|true}} if the first range is lexicographically ''less'' than the second. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc equal }} +{{dsc end}} diff --git a/doc/wiki/algorithm/max.mwiki b/doc/wiki/algorithm/max.mwiki new file mode 100644 index 00000000..dfd46ebf --- /dev/null +++ b/doc/wiki/algorithm/max.mwiki @@ -0,0 +1,31 @@ +{{simdpp/title|max}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | +template + T max(T const* first, T const* last, Comp comp); +}} +{{dcl | num=2 | +template + T max(T const* first, T const* last); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|max}} Returns the value of the element with the largest value in the range[first, last[ over comp (If no comp return simply the max).The lowest possible value for the order if the range is empty. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of processed elements}} +{{par | comp | binary comparison operator}} +{{par end}} + +===Return value=== +(max) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc min}} +{{dsc inc | algorithm/dsc max_element}} +{{dsc inc | algorithm/dsc min_element}} +{{dsc end}} diff --git a/doc/wiki/algorithm/max_element.mwiki b/doc/wiki/algorithm/max_element.mwiki new file mode 100644 index 00000000..3113fccd --- /dev/null +++ b/doc/wiki/algorithm/max_element.mwiki @@ -0,0 +1,31 @@ +{{simdpp/title|max_element}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + T const * max_element(T const* first, T const* last); +}} +{{dcl | num=2 | + template + T const * max_element(T const* first, T const* last, Compare comp); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|max_element}} Returns the address of the element with the largest value in the range[first, last[ over comp (If no comp return simply the address of the max). + +===Parameters=== +{{par begin}} +{{par | first, last | the range of processed elements}} +{{par | comp | binary comparison operator}} +{{par end}} + +===Return value=== +(address of the max) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc max}} +{{dsc inc | algorithm/dsc min_element}} +{{dsc inc | algorithm/dsc min}} +{{dsc end}} diff --git a/doc/wiki/algorithm/min.mwiki b/doc/wiki/algorithm/min.mwiki new file mode 100644 index 00000000..40db99c3 --- /dev/null +++ b/doc/wiki/algorithm/min.mwiki @@ -0,0 +1,31 @@ +{{simdpp/title|min}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | +template + T min(T const* first, T const* last, Comp comp); +}} +{{dcl | num=2 | +template + T min(T const* first, T const* last); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|max}} Returns the value of the element with the lowest value in the range[first, last[ over comp (If no comp return simply the min).The greatest possible value for the order if the range is empty. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of processed elements}} +{{par | comp | binary comparison operator}} +{{par end}} + +===Return value=== +(min) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc max}} +{{dsc inc | algorithm/dsc max_element}} +{{dsc inc | algorithm/dsc min_element}} +{{dsc end}} \ No newline at end of file diff --git a/doc/wiki/algorithm/min_element.mwiki b/doc/wiki/algorithm/min_element.mwiki new file mode 100644 index 00000000..d72fa233 --- /dev/null +++ b/doc/wiki/algorithm/min_element.mwiki @@ -0,0 +1,31 @@ +{{simdpp/title|min_element}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + T const * min_element(T const* first, T const* last); +}} +{{dcl | num=2 | + template + T const * min_element(T const* first, T const* last, Compare comp); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|min_element}} Returns the address of the element with the lowest value in the range[first, last[ over comp (If no comp return simply the address of the min). + +===Parameters=== +{{par begin}} +{{par | first, last | the range of processed elements}} +{{par | comp | binary comparison operator}} +{{par end}} + +===Return value=== +(address of the min) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc max}} +{{dsc inc | algorithm/dsc max_element}} +{{dsc inc | algorithm/dsc min}} +{{dsc end}} diff --git a/doc/wiki/algorithm/none_of.mwiki b/doc/wiki/algorithm/none_of.mwiki new file mode 100644 index 00000000..f49df565 --- /dev/null +++ b/doc/wiki/algorithm/none_of.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|none_of}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + bool none_of(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|none_of}} Checks if unary predicate p returns true for {{tt|no}} elements in the range {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate +{{par end}} + +===Return value=== +{{tt|true}} if unary predicate returns {{tt|true}} for no elements in the range, {{tt|false}} otherwise. Returns {{tt|true}} if the range is empty. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc all_of}} +{{dsc inc | algorithm/dsc any_of}} +{{dsc end}} diff --git a/doc/wiki/algorithm/reduce.mwiki b/doc/wiki/algorithm/reduce.mwiki new file mode 100644 index 00000000..2e35a049 --- /dev/null +++ b/doc/wiki/algorithm/reduce.mwiki @@ -0,0 +1,38 @@ +{{simdpp/title|reduce}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | +template + T reduce(T const* first, T const* last, T init); +}} +{{dcl | num=2 | +template + T reduce(T const* first, T const* last, T init, T neutral, BinOp f); +}} +{{dcl end}} +{{misc/navbar}} + +@1@ Computes the sum over elements in the given Range [first,last) and the initial value init. +@2@ Reduces the range [first,last), possibly permuted and aggregated in unspecified manner, along with the initial value init over binary_op. + +===Notes=== +The behavior is non-deterministic if binary_op is not associative or not commutative. + +If the range is empty, {{tt|init}} is returned, unmodified + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to apply the algorithm to}} +{{par | init | the initial value of the generalized sum}} +{{par | binary_op | binary FunctionObject that will be applied in unspecified order}} +{{par | neutral | Value containing the neutral element of BinOp}} +{{par end}} + +===Return value=== +Generalized sum of {{tt|init}} and {{tt|*first}}, {{tt|*(first+1)}}, ... {{tt|*(last-1)}} over {{tt|binary_op}}, +in other words, {{tt|reduce}} behaves like {{lc|accumulate}} except the elements of the range may be grouped and rearranged in arbitrary order + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc transform }} +{{dsc end}} diff --git a/doc/wiki/algorithm/replace.mwiki b/doc/wiki/algorithm/replace.mwiki new file mode 100644 index 00000000..412a9e4b --- /dev/null +++ b/doc/wiki/algorithm/replace.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|replace}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + void replace(T * first, T * last, T const & old_val, T const & new_val); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|replace}} Replaces all elements that are equal to old_val by new_val in the range defined by {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | old_val | the value of elements to replace}} +{{par | new_val | the value to use as replacement}} +{{par end}} + +===Return value=== +(None) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc replace_if}} +{{dsc end}} diff --git a/doc/wiki/algorithm/replace_if.mwiki b/doc/wiki/algorithm/replace_if.mwiki new file mode 100644 index 00000000..90c76fa9 --- /dev/null +++ b/doc/wiki/algorithm/replace_if.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|replace_if}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + void replace_if(T* first, T* last, UnaryPredicate pred , const T& new_val); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|replace_if}} Replaces all elements for which predicate p returns {{tt|true}}. in the range defined by {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | old_val | the value of elements to replace}} +{{par | pred | unary predicate which returns ​true if the element value should be replaced.}} +{{par end}} + +===Return value=== +(None) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc replace}} +{{dsc end}} diff --git a/doc/wiki/algorithm/transform.mwiki b/doc/wiki/algorithm/transform.mwiki new file mode 100644 index 00000000..c6a82431 --- /dev/null +++ b/doc/wiki/algorithm/transform.mwiki @@ -0,0 +1,56 @@ +{{simdpp/title|transform}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | +template + U* transform(T const* first1, T const* last1, U* out, UnOp f); +}} +{{dcl | num=2 | +template + U* transform(T1 const* first1, T1 const* last1, T2 const* first2, U* out, BinOp f); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|transform}} applies the given function to a range and stores the result in another range, beginning at out. + +@1@ The unary operation unary_op is applied to the range defined by [first1, last1). +@2@ The binary operation binary_op is applied to pairs of elements from two ranges: one defined by [first1, last1) and the other beginning at first2. + +===Parameters=== +{{par begin}} +{{par | first1, last1 | the first range of elements to transform}} +{{par | first2 | the beginning of the second range of elements to transform}} +{{par | out | the beginning of the destination range, may be equal to first1 or first2}} +{{par | UnOp | unary operation function object that will be applied.}} +{{par | BinOp | binary operation function object that will be applied.}} +{{par end}} + +===Return value=== +(out) + +===Notes=== +Notes +transform does not guarantee in-order application of unary_op or binary_op. +To apply a function to a sequence in-order or to apply a function that modifies the elements of a sequence, use for_each + +===Equivalent operation=== +{{source|1= +while (first1 != last1) { + *out++ = UnOp(*first1++); + } + return out; +}} + +{{source|2= +while (first1 != last1) { + *out++ = BinOp(*first1++, *first2++); + } + return out; +}} + +===See also=== +{{dsc begin}} +{{dsc inc | misc/dsc for_each }} +{{dsc inc | algorithm/dsc reduce }} +{{dsc end}} diff --git a/doc/wiki/algorithm/transform_reduce.mwiki b/doc/wiki/algorithm/transform_reduce.mwiki new file mode 100644 index 00000000..f827f171 --- /dev/null +++ b/doc/wiki/algorithm/transform_reduce.mwiki @@ -0,0 +1,42 @@ +{{simdpp/title|transform_reduce}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + T transform_reduce( const T* first, const T* last, T init, BinaryOp binary_op,UnaryOp unary_op); +}} +{{dcl | num=2 | + template + U transform_reduce(const T1* first1, const T1* last1, const T2* first2, U init, BinaryOp1 binary_op1, BinaryOp2 binary_op2); + +}} +{{dcl end}} +{{misc/navbar}} + +@1@ Applies {{tt|unary_op}} to each element in the range {{math|[first; last)}} and reduces the results (possibly permuted and aggregated in unspecified manner) along with the initial value {{tt|init}} over {{tt|binary_op}}. + +@2@ Applies {{tt|binary_op2}} to each pair of elements from the ranges {{c|[first; last)}} and the range starting at {{c|first2}} and reduces the results (possibly permuted and aggregated in unspecified manner) along with the initial value {{tt|init}} over {{tt|binary_op1}} + +===Notes=== +The behavior is non-deterministic if {{tt|binary_op}}/{{tt|binary_op2}} is not associative or not commutative. + +The behavior is undefined if {{tt|unary_op}}, {{tt|binary_op}}, {{tt|binary_op1}}, or {{tt|binary_op2}} modifies any element + +===Parameters=== +{{par begin}} +{{par | first1, last1 | the first range of elements to transform}} +{{par | first2 | the beginning of the second range of elements to transform}} +{{par | init | the initial value of the generalized sum}} +{{par | unary_op | unary {{concept|FunctionObject}} that will be applied to each element of the input range. The return type must be acceptable as input to {{tt|binary_op}} }} +{{par | binary_op | binary {{concept|FunctionObject}} that will be applied in unspecified order to the results of {{tt|unary_op}}, the results of other {{tt|binary_op}} and {{tt|init}}.}} +{{par end}} + +===Return value=== +@1@ Generalized sum of {{tt|init}} and {{tt|unary_op(*first)}}, {{tt|unary_op(*(first+1))}}, ... {{tt|unary_op(*(last-1))}} over {{tt|binary_op}}, +@2@ Generalized sum of {{tt|init}} and {{tt|binary_op2(*first,*first2)}}, {{tt|binary_op2(*(first+1),*(first2+1))}}, ..., over {{tt|binary_op1}} + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc transform }} +{{dsc inc | algorithm/dsc reduce }} +{{dsc end}} diff --git a/simdpp/algorithm/all_of.h b/simdpp/algorithm/all_of.h new file mode 100644 index 00000000..4bcb9d42 --- /dev/null +++ b/simdpp/algorithm/all_of.h @@ -0,0 +1,76 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_ALL_OF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_ALL_OF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +bool all_of(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("all_of - null ptr first."); + if (!last) + throw std::runtime_error("all_of - null ptr last."); +#endif + + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + if (!std::all_of(first, lastprologue, pred)) + { + return false; + } + + //simd loop + auto i = size_prologue_loop; + //workaraund not reduce_and for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + simd_mask_T mask = pred(load(lastprologue)); + const auto res = blend(on, off, mask); + + if (!reduce_and(res)) + { + return false; + } + lastprologue += simd_size; + } + if (!std::all_of(lastprologue,last, pred)) + { + return false; + } + return true; +} +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_ALL_OF_H diff --git a/simdpp/algorithm/any_of.h b/simdpp/algorithm/any_of.h new file mode 100644 index 00000000..4874beea --- /dev/null +++ b/simdpp/algorithm/any_of.h @@ -0,0 +1,72 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_ANY_OF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_ANY_OF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +bool any_of(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("any_of - null ptr first."); + if (!last) + throw std::runtime_error("any_of - null ptr last."); +#endif + + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + if (std::any_of(first, lastprologue, pred)) + { + return true; + } + + //simd loop + auto i = size_prologue_loop; + //workaraund not test_bits_any for mask type + const simd_type_T on = splat(T(1)); //TODO factorize + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + simd_mask_T mask = pred(load(lastprologue)); //TODO factorize + const auto res = blend(on, off, mask); + if (test_bits_any(res)) + { + return true; + } + lastprologue += simd_size; + } + return std::any_of(lastprologue,last, pred); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_ANY_OF_H diff --git a/simdpp/algorithm/copy.h b/simdpp/algorithm/copy.h new file mode 100644 index 00000000..9a2a3385 --- /dev/null +++ b/simdpp/algorithm/copy.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_COPY_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_COPY_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +T* copy(T const* first, T const* last, T* out) +{ +struct UnaryOpCopy +{ + using simd_type_T = typename simd_traits::simd_type; + SIMDPP_INL T operator()(T const &a) const SIMDPP_NOEXCEPT + { + return a; + } + + SIMDPP_INL simd_type_T operator()(simd_type_T const &a) const SIMDPP_NOEXCEPT + { + return a; + } +}; + +return transform(first, last, out, UnaryOpCopy{}); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_COPY_H diff --git a/simdpp/algorithm/copy_n.h b/simdpp/algorithm/copy_n.h new file mode 100644 index 00000000..c6165d10 --- /dev/null +++ b/simdpp/algorithm/copy_n.h @@ -0,0 +1,30 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_COPY_N_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_COPY_N_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template T* copy_n(T const* first, Size n, T* out) +{ + if (n <= Size(0)) return out; + return copy(first, first + n, out); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_COPY_N_H diff --git a/simdpp/algorithm/count.h b/simdpp/algorithm/count.h new file mode 100644 index 00000000..489273c4 --- /dev/null +++ b/simdpp/algorithm/count.h @@ -0,0 +1,71 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +typename std::iterator_traits::difference_type +count(T const* first, T const* last, U val) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("count - null ptr first."); + if (!last) + throw std::runtime_error("count - null ptr last."); +#endif + + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + using return_type = typename std::iterator_traits::difference_type; + if (first == last) return (return_type)0; + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + return_type res = std::count(first, lastprologue, val); + + //simd loop + auto i = size_prologue_loop; + + //workaraund not reduce_add for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + const simd_type_T valsimd = splat(U(val)); + + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(lastprologue); + const simd_mask_T mask = cmp_eq(el, valsimd); + const auto rescurrentsimd = blend(on, off, mask); + res += (return_type)reduce_add(rescurrentsimd); + lastprologue += simd_size; + } + res += std::count(lastprologue, last, val); + return res; +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_H diff --git a/simdpp/algorithm/count_if.h b/simdpp/algorithm/count_if.h new file mode 100644 index 00000000..ded8d649 --- /dev/null +++ b/simdpp/algorithm/count_if.h @@ -0,0 +1,69 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_IF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_IF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +typename std::iterator_traits::difference_type +count_if(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("count_if - null ptr first."); + if (!last) + throw std::runtime_error("count_if - null ptr last."); +#endif + + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + using return_type = typename std::iterator_traits::difference_type; + if (first == last) return (return_type)0; + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + return_type res = std::count_if(first, lastprologue, pred); + + //simd loop + auto i = size_prologue_loop; + + //workaraund not reduce_add for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(lastprologue); + const simd_mask_T mask = pred(el); + const auto rescurrentsimd = blend(on, off, mask); + res += (return_type)reduce_add(rescurrentsimd); + lastprologue += simd_size; + } + res += std::count_if(lastprologue, last, pred); + return res; +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_IF_H diff --git a/simdpp/algorithm/equal.h b/simdpp/algorithm/equal.h new file mode 100644 index 00000000..092c798b --- /dev/null +++ b/simdpp/algorithm/equal.h @@ -0,0 +1,101 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_EQUAL_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_EQUAL_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +bool equal(const T* first1, const T* last1, const T* first2,BinaryPredicate pred) +{ + +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("equal - null ptr first1."); + if (!last1) + throw std::runtime_error("equal - null ptr last1."); + if (!first2) + throw std::runtime_error("equal - null ptr first2."); +#endif + using simd_type_T = typename simd_traits::simd_type; + //using simd_mask_T = typename simd_traits::simd_mask_type; + auto alignment = simd_traits::alignment; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first1, last1); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto last1prologue = first1 + size_prologue_loop; + auto last2prologue = first2 + size_prologue_loop; + if(!std::equal(first1, last1prologue,first2,pred)) return false; + + auto i=size_prologue_loop; + //workaound no reduce_and for mask_type + const simd_type_T on=splat(T(1)); + const simd_type_T off=splat(T(0)); + //---main simd loop + if (detail::is_aligned(last2prologue, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element1 = load(last1prologue); + const simd_type_T element2 = load(last2prologue); + const simd_type_T res=blend(on,off,pred(element1,element2)); //workaound no reduce_and for mask_type + if(!reduce_and(res)) return false; + last1prologue += simd_size; + last2prologue += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element1 = load(last1prologue); + const simd_type_T element2 = load_u(last2prologue); + const simd_type_T res=blend(on,off,pred(element1,element2));//workaound no reduce_and for mask_type + if(!reduce_and(res)) return false; + last1prologue += simd_size; + last2prologue += simd_size; + } + } + + if(!std::equal(last1prologue, last1,last2prologue,pred)) return false; + return true; +} + +template +bool equal(const T* first1, const T* last1, const T* first2) +{ + struct local_bynary_predicate_equal + { + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + SIMDPP_INL bool operator()(const T& a0,const T& a1) const SIMDPP_NOEXCEPT {return a0==a1;} + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a0,const simd_type_T& a1) const SIMDPP_NOEXCEPT {return cmp_eq(a0,a1);} + }; + return equal(first1,last1,first2,local_bynary_predicate_equal()); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_EQUAL_H diff --git a/simdpp/algorithm/fill.h b/simdpp/algorithm/fill.h new file mode 100644 index 00000000..b6b507b4 --- /dev/null +++ b/simdpp/algorithm/fill.h @@ -0,0 +1,77 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_FILL_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_FILL_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +void fill(T* first, T* last, U value) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("fill - null ptr first."); + if (!last) + throw std::runtime_error("fill - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + const auto alignment = simd_traits::alignment; + + simd_type_T valsimd = splat((T)value); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + + //---prologue + for (; i < size_prologue_loop; ++i) + { + *first++=(T)value; + } + + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + store(first, valsimd); + first += simd_size; + } + + + //---epilogue + for (; i < size; ++i) + { + *first++ = (T)value; + } + +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FILL_H diff --git a/simdpp/algorithm/find.h b/simdpp/algorithm/find.h new file mode 100644 index 00000000..213f6dba --- /dev/null +++ b/simdpp/algorithm/find.h @@ -0,0 +1,89 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_FIND_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_FIND_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //find +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +T const* find(T const* first, T const* last, U val) +{ + struct UnaryPredicateEqualValue + { + public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(splat(val)) {} + using simd_mask_T = typename simd_traits::simd_mask_type; + using simd_type_T = typename simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } + private: + T m_val; + simd_type_T m_val_simd; + }; + +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("find - null ptr first."); + if (!last) + throw std::runtime_error("find - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T =typename simd_traits::simd_mask_type; + + if (first == last) return last; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + const auto resprologue = std::find(first, lastprologue, val); + if (resprologue != lastprologue) return resprologue; + + //simd loop + auto i = size_prologue_loop; + + //workaraund not test_bits_any for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + const auto pred = UnaryPredicateEqualValue((T)val); + for (; i < size_simd_loop; i += simd_size) + { + //TR why can't test_bits_any not available for mask? + const simd_mask_T mask=pred(load(lastprologue)); + const auto res = blend(on, off,mask); + if (test_bits_any(res)) //match extract exact position + { + return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? + } + lastprologue += simd_size; + } + + //epilogue + return std::find(lastprologue, last, val); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_H diff --git a/simdpp/algorithm/find_if.h b/simdpp/algorithm/find_if.h new file mode 100644 index 00000000..f8e998f5 --- /dev/null +++ b/simdpp/algorithm/find_if.h @@ -0,0 +1,74 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //find_if +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +T const* find_if(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("find_if - null ptr first."); + if (!last) + throw std::runtime_error("find_if - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + + if (first == last) return last; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + const auto resprologue = std::find_if(first, lastprologue, pred); + if (resprologue != lastprologue) return resprologue; + + //simd loop + auto i = size_prologue_loop; + + //workaraund not test_bits_any for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + //TR why can't test_bits_any not available for mask? + const simd_mask_T mask = pred(load(lastprologue)); + const auto res = blend(on, off,mask); + if (test_bits_any(res)) //match extract exact position + { + return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? + } + lastprologue += simd_size; + } + + //epilogue + return std::find_if(lastprologue, last, pred); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_H diff --git a/simdpp/algorithm/find_if_not.h b/simdpp/algorithm/find_if_not.h new file mode 100644 index 00000000..b237e51a --- /dev/null +++ b/simdpp/algorithm/find_if_not.h @@ -0,0 +1,75 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_NOT_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_NOT_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //find_if +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +T const* find_if_not(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("find_if - null ptr first."); + if (!last) + throw std::runtime_error("find_if - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + + if (first == last) return last; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + const auto resprologue = std::find_if_not(first, lastprologue, pred); + if (resprologue != lastprologue) return resprologue; + + //simd loop + auto i = size_prologue_loop; + + //workaraund not reduce_and for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + //TR why can't reduce_and not available for mask? + simd_mask_T mask = pred(load(lastprologue)); + const auto res = blend(on, off, mask); + if (!reduce_and(res)) //match extract exact position + { + return std::find_if_not(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? + } + lastprologue += simd_size; + } + + //epilogue + return std::find_if_not(lastprologue, last, pred); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_NOT_H diff --git a/simdpp/algorithm/helper_input_range.h b/simdpp/algorithm/helper_input_range.h new file mode 100644 index 00000000..fe9fe2c4 --- /dev/null +++ b/simdpp/algorithm/helper_input_range.h @@ -0,0 +1,57 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_HELPER_INPUT_RANGE_H +#define LIBSIMDPP_SIMDPP_HELPER_INPUT_RANGE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { +/** +Extract from contigous range [first,last[ + The two loop counter + -the scalar prologue [start,size_prologue_loop[ i.e the range defined between the original begin and the first + location to be properly aligned to be used through simd operators + - the main simd_loop_part,[size_prologue_loop,size_simd_loop[ i.e the range where we could apply simd operators + - Note epilogue equals [size_simd_loop,stop[ +*/ +template +const std::pair SIMDPP_INL helper_input_range(const T* first, const T* last) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("helper_input_range - null ptr first."); + if (!last) + throw std::runtime_error("helper_input_range - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + + const auto simd_size = simd_type_T::base_length; + const auto alignment = simd_traits::alignment; + + const auto size = last - first; + //get first aligned adress from first + const T* ptr_aligned_first =detail::reach_next_aligned(first, alignment); + // Next aligned address may be out of range, so make sure size_prologue_loop is not bigger than size + const auto size_prologue_loop = std::min(size,std::distance(first, ptr_aligned_first)); + const auto size_simd_loop = (size >= size_prologue_loop) ? (simd_size * ((size- size_prologue_loop) / simd_size)) : (0u); + + return std::make_pair(size_prologue_loop, size_simd_loop); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_HELPER_INPUT_RANGE_H diff --git a/simdpp/algorithm/lexicographical_compare.h b/simdpp/algorithm/lexicographical_compare.h new file mode 100644 index 00000000..0862a220 --- /dev/null +++ b/simdpp/algorithm/lexicographical_compare.h @@ -0,0 +1,132 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_LEXICOGRAPHICAL8COMPARE_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_LEXICOGRAPHICAL8COMPARE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2,BinarayPredicate comp) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("lexicographical_compare - null ptr first1."); + if (!last1) + throw std::runtime_error("lexicographical_compare - null ptr last1."); + if (!first2) + throw std::runtime_error("lexicographical_compare - null ptr first2."); + if (!last2) + throw std::runtime_error("lexicographical_compare - null ptr last2."); +#endif + + using simd_type_T = typename simd_traits::simd_type; + + auto alignment = simd_traits::alignment; + using difference_type_T = typename std::iterator_traits::difference_type; + difference_type_T d1 = std::distance(first1, last1); + difference_type_T d2 = std::distance(first2, last2); + bool shorter = d1 < d2; + auto last = shorter ? last1 : first1+d2; + auto size = shorter ? d1 : d2; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first1, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i=0u; + //---prologue + for (; i < size_prologue_loop; ++i) + { + if (comp(*first1, *first2)) return true; + if (comp(*first2, *first1)) return false; + ++first1; + ++first2; + } + + //---main simd loop + if (detail::is_aligned(first2, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element1 = load(first1); + simd_type_T element2 = load(first2); + if(comp(element1, element2)) return true; + if(comp(element2, element1)) return false; + first1 += simd_size; + first2 += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element1 = load(first1); + simd_type_T element2 = load_u(first2); + if(comp(element1, element2)) return true; + if(comp(element2, element1)) return false; + first1 += simd_size; + first2 += simd_size; + } + } + //---epilogue + for (; i < size; ++i) + { + if (comp(*first1, *first2)) return true; + if (comp(*first2, *first1)) return false; + ++first1; + ++first2; + } + return shorter; +} + +template +bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2) +{ + struct local_binary_predicate_less + { + using simd_type_T = typename simd_traits::simd_type; + + local_binary_predicate_less():on(splat(T(1))),off(splat(T(0))) {} + + SIMDPP_INL bool operator()(T a0,T a1) const SIMDPP_NOEXCEPT { return a0 + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_MAX_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_MAX_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //numeric_limits +#include //max +#include +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +//Returns the value of the element with the largest value in the range[first, last[ over comp, +//The lowest possible value for the order if the range is empty. +template +T max(T const* first, T const* last, Comp comp) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("max - null ptr first."); + if (!last) + throw std::runtime_error("max - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + + if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::max() : std::numeric_limits::lowest(); //stolen from boost::simd + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto max_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + if (comp(*first, max_val)) + { + max_val = *first; + } + first++; + } + + //---main simd loop + simd_type_T current_max_simd = splat(max_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element = load(first); + const simd_mask_T mask = comp(current_max_simd, element); + current_max_simd = blend(current_max_simd, element, mask); + first += simd_size; + } + //extract max from simdtype + for_each(current_max_simd, [&](T el) {if (comp(el, max_val)) { max_val = el; }}); + + //---epilogue + for (; i < size; ++i) + { + if (comp(*first, max_val)) + { + max_val = *first; + } + first++; + } + return max_val; +} + +//Returns the value of the element with the largest value in the range[first, last[, +//The lowest possible value for the order if the range is empty. +template +T max(T const* first, T const* last) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("max - null ptr first."); + if (!last) + throw std::runtime_error("max - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + + if (first == last) return std::numeric_limits::lowest(); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto max_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + max_val = std::max(max_val, *first++); + } + //---main simd loop + simd_type_T current_max_simd = splat(max_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(first); + current_max_simd = max(current_max_simd, el); + first += simd_size; + } + //extract max from simdtype + max_val = reduce_max(current_max_simd); + + //---epilogue + for (; i < size; ++i) + { + max_val = std::max(max_val, *first++); + } + + return max_val; +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_MAX_H diff --git a/simdpp/algorithm/max_element.h b/simdpp/algorithm/max_element.h new file mode 100644 index 00000000..06e46f66 --- /dev/null +++ b/simdpp/algorithm/max_element.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_MAX_ELEMENT_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_MAX_ELEMENT_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //numeric_limits +#include //max +#include +#include +#include +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +T const * max_element(T const* first, T const* last, Compare comp) +{ + if (first == last) return last; + return find(first, last, simdpp::max(first, last, comp)); +} + +template +T const * max_element(T const* first, T const* last) +{ + if (first == last) return last; + return find(first, last, simdpp::max(first, last)); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_MAX_ELEMENT_H diff --git a/simdpp/algorithm/min.h b/simdpp/algorithm/min.h new file mode 100644 index 00000000..9244d541 --- /dev/null +++ b/simdpp/algorithm/min.h @@ -0,0 +1,142 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_MIN_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_MIN_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //numeric_limits +#include //min +#include +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +//Returns the value of the element with the smallest value in the range[first, last[ over comp, +//The largest possible value for the order if the range is empty. +template +T min(T const* first, T const* last, Comp comp) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("min - null ptr first."); + if (!last) + throw std::runtime_error("min - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + + if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::lowest() : std::numeric_limits::max(); //stolen from boost::simd + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto min_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + if (comp(min_val, *first)) + { + min_val = *first; + } + first++; + } + + //---main simd loop + simd_type_T current_min_simd = splat(min_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element = load(first); + const simd_mask_T mask = comp(element, current_min_simd); + current_min_simd = blend(current_min_simd, element, mask); + first += simd_size; + } + //extract min from simdtype + for_each(current_min_simd, [&](T el) {if (comp(min_val, el)) { min_val = el; }}); + + //---epilogue + for (; i < size; ++i) + { + if (comp(min_val, *first)) + { + min_val = *first; + } + first++; + } + return min_val; +} + +//Returns the value of the element with the smallest value in the range[first, last[, +//The largest possible value for the order if the range is empty. +template +T min(T const* first, T const* last) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("min - null ptr first."); + if (!last) + throw std::runtime_error("min - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + + if (first == last) return std::numeric_limits::max(); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto min_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + min_val = std::min(min_val, *first++); + } + + //---main simd loop + simd_type_T current_min_simd = splat(min_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(first); + current_min_simd = min(el, current_min_simd); + first += simd_size; + } + //extract min from simdtype + min_val = reduce_min(current_min_simd); + + //---epilogue + for (; i < size; ++i) + { + min_val = std::min(min_val, *first++); + } + + return min_val; +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_MIN_H diff --git a/simdpp/algorithm/min_element.h b/simdpp/algorithm/min_element.h new file mode 100644 index 00000000..b33a7df7 --- /dev/null +++ b/simdpp/algorithm/min_element.h @@ -0,0 +1,45 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_MIN_ELEMENT_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_MIN_ELEMENT_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //numeric_limits +#include //min +#include +#include +#include +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +T const * min_element(T const* first, T const* last, Compare comp) +{ + if (first == last) return last; + return find(first, last, simdpp::min(first, last, comp)); +} + +template +T const * min_element(T const* first, T const* last) +{ + if (first == last) return last; + return find(first, last, simdpp::min(first, last)); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_MIN_ELEMENT_H diff --git a/simdpp/algorithm/none_of.h b/simdpp/algorithm/none_of.h new file mode 100644 index 00000000..1adcb49d --- /dev/null +++ b/simdpp/algorithm/none_of.h @@ -0,0 +1,36 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_NONE_OF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_NONE_OF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +bool none_of(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("none_of - null ptr first."); + if (!last) + throw std::runtime_error("none_of - null ptr last."); +#endif + return !any_of(first,last,pred); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_NONE_OF_H diff --git a/simdpp/algorithm/reduce.h b/simdpp/algorithm/reduce.h new file mode 100644 index 00000000..04b87eb9 --- /dev/null +++ b/simdpp/algorithm/reduce.h @@ -0,0 +1,124 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_REDUCE_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_REDUCE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +T reduce(T const* first, T const* last, T init) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("reduce - null ptr first."); + if (!last) + throw std::runtime_error("reduce - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + + simd_type_T accusimd = splat((T)0); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + //---prologue + for (; i < size_prologue_loop; ++i) + { + init += *first++; + } + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + accusimd = accusimd + element; //TODO need += + first += simd_size; + } + + + //---epilogue + for (; i < size; ++i) + { + init += *first++; + } + + //sum simd residual + init += reduce_add(accusimd); + return init; +} + +template +T reduce(T const* first, T const* last, T init, T neutral, BinOp f) //need neutral element for simd part +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("reduce - null ptr first."); + if (!last) + throw std::runtime_error("reduce - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + const auto alignment = simd_traits::alignment; + + //Define loop counter + const auto size = std::distance(first, last); + const auto simd_size = simd_type_T::base_length; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0; + simd_type_T accusimd = splat(T(neutral)); //think about product sum + + //---prologue + for (; i < size_prologue_loop; ++i) + { + init = f(init, *first++); + } + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + accusimd = f(accusimd, element); + first += simd_size; + } + //---epilogue + for (; i < size; ++i) + { + init = f(init, *first++); + } + + //reduce simd residual + for_each(accusimd, [&](T el) { init = f(init, el); }); + return init; +} +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_REDUCE_H + diff --git a/simdpp/algorithm/replace.h b/simdpp/algorithm/replace.h new file mode 100644 index 00000000..31d2d94c --- /dev/null +++ b/simdpp/algorithm/replace.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +void replace(T* first, T* last, T const & old_val, T const & new_val) +{ + struct local_predicate + { + using simd_type_T = typename simd_traits::simd_type; + local_predicate(const T & old_val, const T & new_val) : m_old_val(old_val), m_new_val(new_val),m_old_val_simd(splat(old_val)), m_new_val_simd(splat(new_val)) {} + + SIMDPP_INL T operator()( const T& a) const SIMDPP_NOEXCEPT { return a == m_old_val ? m_new_val : a;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return blend(m_new_val_simd,a,cmp_eq(a,m_old_val_simd)); } + + T m_old_val, m_new_val; + simd_type_T m_old_val_simd, m_new_val_simd; + }; + + transform(first, last, first, local_predicate(old_val, new_val)); +} + +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_H diff --git a/simdpp/algorithm/replace_if.h b/simdpp/algorithm/replace_if.h new file mode 100644 index 00000000..5c3f8b5e --- /dev/null +++ b/simdpp/algorithm/replace_if.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_IF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_IF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +void replace_if(T* first, T* last, UnaryPredicate pred , const T& new_val) +{ + struct local_predicate + { + using simd_type_T = typename simd_traits::simd_type; + local_predicate(const UnaryPredicate& pred,const T & new_val) : m_new_val(new_val),m_new_val_simd(splat(new_val)),m_pred(pred) {} + + SIMDPP_INL T operator()( const T& a) const SIMDPP_NOEXCEPT { return m_pred(a) ? m_new_val : a;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return blend(m_new_val_simd,a,m_pred(a)); } + + T m_new_val; + simd_type_T m_new_val_simd; + UnaryPredicate m_pred; + }; + + transform(first, last, first, local_predicate(pred, new_val)); +} +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_IF_H diff --git a/simdpp/algorithm/transform.h b/simdpp/algorithm/transform.h new file mode 100644 index 00000000..af78e85c --- /dev/null +++ b/simdpp/algorithm/transform.h @@ -0,0 +1,174 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { + +template +U* transform(T const* first, T const* last, U* out, UnOp f) +{ +#ifdef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("transform - null ptr first."); + if (!last) + throw std::runtime_error("transform - null ptr last."); + if (!out) + throw std::runtime_error("transform - null ptr out."); +#endif + using simd_type_T = typename simd_traits::simd_type; + using simd_type_U = typename simd_traits::simd_type; + + static_assert (simd_type_T::base_length == simd_type_U::base_length + , "mismatch base_length between T and U" + ); + + const auto alignment = simd_traits::alignment; + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0; + + //---prologue + for (; i < size_prologue_loop; ++i) + { + *out++ = f(*first++); + } + + //---main simd loop + if (detail::is_aligned(out, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + store(out, f(element)); + first += simd_size; + out += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load_u(first); + store_u(out, f(element)); + first += simd_size; + out += simd_size; + } + } + //---epilogue + for (; i < size; ++i) + { + *out++ = f(*first++); + } + return out; +} +template +U* transform(T1 const* first1, T1 const* last1, T2 const* first2, U* out, BinOp f) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("transform - null ptr first1."); + if (!last1) + throw std::runtime_error("transform - null ptr last1."); + if (!first2) + throw std::runtime_error("transform - null ptr first2."); + if (!out) + throw std::runtime_error("transform - null ptr out."); +#endif + using simd_type_T1 = typename simd_traits::simd_type; + using simd_type_T2 = typename simd_traits::simd_type; + using simd_type_U = typename simd_traits::simd_type; + + static_assert (simd_type_T1::base_length == simd_type_T2::base_length + , "mismatch base_length between T1 and T2" + ); + static_assert (simd_type_T1::base_length == simd_type_U::base_length + , "mismatch base_length between T1 and U" + ); + static_assert (simd_type_T2::base_length == simd_type_U::base_length + , "mismatch base_length between T2 and U" + ); + + auto alignment = simd_traits::alignment; + + //Define loop counter + const auto simd_size = simd_type_T1::base_length; + const auto size = std::distance(first1, last1); + const auto range = helper_input_range(first1, last1); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + + auto i = 0; + + //---prologue + for (; i < size_prologue_loop; ++i) + { + *out++ = f(*first1++, *first2++); + } + + //---main simd loop + if (detail::is_aligned(first2, alignment) && detail::is_aligned(out, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T1 element1 = load(first1); + simd_type_T2 element2 = load(first2); + store(out, f(element1, element2)); + first1 += simd_size; + first2 += simd_size; + out += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T1 element1 = load(first1); + simd_type_T2 element2 = load_u(first2); + store_u(out, f(element1, element2)); + first1 += simd_size; + first2 += simd_size; + out += simd_size; + } + } + //---epilogue + for (; i < size; ++i) + { + *out++ = f(*first1++, *first2++); + } + return out; +} +} // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_H + diff --git a/simdpp/algorithm/transform_reduce.h b/simdpp/algorithm/transform_reduce.h new file mode 100644 index 00000000..8c314b38 --- /dev/null +++ b/simdpp/algorithm/transform_reduce.h @@ -0,0 +1,151 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_REDUCE_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_REDUCE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include + +namespace simdpp { +namespace SIMDPP_ARCH_NAMESPACE { +template +T transform_reduce( const T* first, const T* last, T init, BinaryOp binary_op,UnaryOp unary_op) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("transform_reduce - null ptr first."); + if (!last) + throw std::runtime_error("transform_reduce - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + //---prologue + for (; i < size_prologue_loop; ++i) + { + init = binary_op(init,unary_op(*first++)); + } + + simd_type_T accusimd=splat((T)0); + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = unary_op(load(first)); + accusimd = binary_op(accusimd,element); + first += simd_size; + } + //reduce simd residual + for_each(accusimd, [&](T el) { init = binary_op(init, el); }); + + //---epilogue + for (; i < size; ++i) + { + init = binary_op(init,unary_op(*first++)); + } + return init; + +} + +template +U transform_reduce(const T1* first1, const T1* last1, const T2* first2, U init, BinaryOp1 binary_op1, BinaryOp2 binary_op2) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("transform_reduce - null ptr first1."); + if (!last1) + throw std::runtime_error("transform_reduce - null ptr last1."); + if (!first2) + throw std::runtime_error("transform_reduce - null ptr first2."); +#endif + using simd_type_T1 = typename simd_traits::simd_type; + using simd_type_T2 = typename simd_traits::simd_type; + using simd_type_U = typename simd_traits::simd_type; + + static_assert (simd_type_T1::base_length == simd_type_T2::base_length + , "mismatch base_length between T1 and T2" + ); + static_assert (simd_type_T1::base_length == simd_type_U::base_length + , "mismatch base_length between T1 and U" + ); + static_assert (simd_type_T2::base_length == simd_type_U::base_length + , "mismatch base_length between T2 and U" + ); + + auto alignment = simd_traits::alignment; + + //Define loop counter + const auto simd_size = simd_type_T1::base_length; + const auto size = std::distance(first1, last1); + const auto range = helper_input_range(first1, last1); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + + auto i = 0u; + //---Prologue + for(;i +#include namespace simdpp { namespace SIMDPP_ARCH_NAMESPACE { @@ -42,6 +43,25 @@ const T* assume_aligned(const T* x, unsigned bytes) #endif } +SIMDPP_INL bool is_aligned(const void* ptr, std::size_t alignment) noexcept +{ + assert(((alignment & (alignment - 1)) == 0)); + return ((std::size_t)ptr & (alignment - 1)) == 0; //from boost\align\detail\is_aligned.hpp +} + +SIMDPP_INL bool is_aligned(std::size_t val, std::size_t alignment) noexcept +{ + assert(((alignment & (alignment - 1)) == 0)); + return (val & (alignment - 1)) == 0; //from boost\align\detail\is_aligned.hpp +} + +template +SIMDPP_INL T* reach_next_aligned(T* ptr, std::size_t alignment) noexcept +{ + assert(((alignment & (alignment - 1)) == 0)); + return reinterpret_cast(((std::size_t)ptr + alignment - 1) &~(alignment - 1)); //from boost\align\detail\align_up.hpp +} + } // namespace detail } // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp diff --git a/simdpp/dispatch/dispatcher.h b/simdpp/dispatch/dispatcher.h index 03c2f6f2..1c638f3d 100644 --- a/simdpp/dispatch/dispatcher.h +++ b/simdpp/dispatch/dispatcher.h @@ -67,7 +67,7 @@ struct FnVersion { const char* arch_name; }; -inline FnVersion select_version_any(FnVersion* versions, unsigned size, +inline FnVersion select_version_any(FnVersion* versions, std::size_t size, Arch arch) { // No need to try to be very efficient here. @@ -76,7 +76,7 @@ inline FnVersion select_version_any(FnVersion* versions, unsigned size, return lhs.needed_arch > rhs.needed_arch; }); - unsigned i; + std::size_t i; for (i = 0; i < size; ++i) { if (versions[i].fun_ptr == nullptr) continue; diff --git a/simdpp/dispatch/get_arch_string_list.h b/simdpp/dispatch/get_arch_string_list.h index 1907892f..dbe76937 100644 --- a/simdpp/dispatch/get_arch_string_list.h +++ b/simdpp/dispatch/get_arch_string_list.h @@ -84,10 +84,10 @@ inline Arch get_arch_string_list(const char* const strings[], int count, const c return res; #endif - int prefixlen = std::strlen(prefix); - for (int i = 0; i < count; ++i) { + auto prefixlen = std::strlen(prefix); + for (auto i = 0; i < count; ++i) { const char* s = *strings++; - int len = std::strlen(s); + size_t len = std::strlen(s); // check if s matches prefix if (len < prefixlen) diff --git a/simdpp/setup_arch.h b/simdpp/setup_arch.h index a644aa94..4cf9ab7c 100644 --- a/simdpp/setup_arch.h +++ b/simdpp/setup_arch.h @@ -390,6 +390,12 @@ #error "Unsupported compiler" #endif +#if _MSC_VER && _MSC_VER<=1800 +#define SIMDPP_NOEXCEPT +#else +#define SIMDPP_NOEXCEPT noexcept +#endif + #define SIMDPP_LIBRARY_VERSION_CXX11 1 #define SIMDPP_LIBRARY_VERSION_CXX98 0 diff --git a/simdpp/types/traits.h b/simdpp/types/traits.h index a1302e26..0c93aa60 100644 --- a/simdpp/types/traits.h +++ b/simdpp/types/traits.h @@ -1,19 +1,20 @@ /* Copyright (C) 2012 Povilas Kanapickas - Distributed under the Boost Software License, Version 1.0. - (See accompanying file LICENSE_1_0.txt or copy at - http://www.boost.org/LICENSE_1_0.txt) +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) */ #ifndef LIBSIMDPP_SIMDPP_TYPES_TRAITS_H #define LIBSIMDPP_SIMDPP_TYPES_TRAITS_H #ifndef LIBSIMDPP_SIMD_H - #error "This file must be included through simd.h" +#error "This file must be included through simd.h" #endif #include #include + namespace simdpp { namespace SIMDPP_ARCH_NAMESPACE { @@ -21,54 +22,153 @@ namespace SIMDPP_ARCH_NAMESPACE { template struct is_vector : std::false_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; /// Allows detection whether specific type is a simdpp value (i.e. not expression) vector template struct is_value_vector : std::false_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; /// Allows detection whether specific type is a simdpp mask template struct is_mask : std::false_type {}; -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; + + +/// Define simd_traits +template +struct simd_traits +{ + static const size_t alignment = std::alignment_of::value; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT8_SIZE; + using simd_type = int8; + using simd_mask_type = mask_int8; + static const size_t alignment = fast_size; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT8_SIZE; + using simd_type = uint8; + using simd_mask_type = mask_int8; + static const size_t alignment = fast_size; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT16_SIZE; + using simd_type = int16; + using simd_mask_type = mask_int16; + static const size_t alignment = fast_size * 2; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT16_SIZE; + using simd_type = uint16; + using simd_mask_type = mask_int16; + static const size_t alignment = fast_size * 2; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT32_SIZE; + using simd_type = int32; + using simd_mask_type = mask_int32; + static const size_t alignment = fast_size * 4; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT32_SIZE; + using simd_type = uint32; + using simd_mask_type = mask_int32; + static const size_t alignment = fast_size * 4; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT64_SIZE; + using simd_type = int64; + using simd_mask_type = mask_int64; + static const size_t alignment = fast_size * 8; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT64_SIZE; + using simd_type = uint64; + using simd_mask_type = mask_int64; + static const size_t alignment = fast_size * 8; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_FLOAT32_SIZE; + using simd_type = float32; + using simd_mask_type = mask_float32; + static const size_t alignment = fast_size * 4; +}; + + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_FLOAT64_SIZE; + using simd_type = float64; + using simd_mask_type = mask_float64; + static const size_t alignment = fast_size * 8; +}; } // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ecf2fc6f..a000282f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -58,23 +58,45 @@ set(TEST_INSN_HEADERS ) set(TEST_INSN_ARCH_SOURCES + insn/all_of.cc + insn/any_of.cc insn/bitwise.cc insn/blend.cc insn/compare.cc insn/construct.cc insn/convert.cc + insn/copy.cc + insn/copy_n.cc + insn/count.cc + insn/count_if.cc + insn/equal.cc insn/for_each.cc + insn/fill.cc + insn/find.cc + insn/find_if.cc + insn/find_if_not.cc + insn/lexicographical_compare.cc insn/math_fp.cc insn/math_int.cc insn/math_shift.cc + insn/max.cc + insn/max_element.cc insn/memory_load.cc insn/memory_store.cc + insn/min.cc + insn/min_element.cc + insn/none_of.cc insn/shuffle.cc insn/shuffle_bytes.cc insn/permute_generic.cc + insn/reduce.cc + insn/replace.cc + insn/replace_if.cc insn/shuffle_generic.cc insn/test_utils.cc insn/tests.cc + insn/transform.cc + insn/transform_reduce.cc insn/transpose.cc ) diff --git a/test/insn/all_of.cc b/test/insn/all_of.cc new file mode 100644 index 00000000..e3f1bdbe --- /dev/null +++ b/test/insn/all_of.cc @@ -0,0 +1,134 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +//algorithm +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template + struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; +}; + + + +template +struct AllOffFuzzingTest +{ + AllOffFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(10) {} + void operator()(TestReporter& tr) + { + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + const auto predEqualFive = UnaryPredicateEqualValue((T)5); + for (auto size : m_sizes) + { + {//aligned input/ouput predicate match + const auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std=std::all_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd=simdpp::all_of(input.data(), input.data() + input.size(),predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput predicate match + const auto input(DataGenerator>(size, m_generator)); + auto res_std=std::all_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd = simdpp::all_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + + {//aligned input/ouput predicate fail + const auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std=std::all_of(input.cbegin(), input.cend(), predEqualFive); + auto res_simd = simdpp::all_of(input.data(), input.data() + input.size(), predEqualFive); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput predicate fail + const auto input(DataGenerator>(size, m_generator)); + auto res_std=std::all_of(input.cbegin(), input.cend(), predEqualFive); + auto res_simd = simdpp::all_of(input.data(), input.data() + input.size(), predEqualFive); + TEST_EQUAL(tr, res_std, res_simd); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; + +template +void test_all_of_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + + {//test with predicate + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + const auto predEqualFive = UnaryPredicateEqualValue((T)5); + { //test prologue + vector_t ivect = { (T)10,(T)10 }; + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test prologue + vector_t ivect = { (T)10,(T)10 }; + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualFive); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualFive); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)10); + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)10); + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualFive); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualFive); + TEST_EQUAL(tr, res, resstd); + } + AllOffFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); + } + +} + +void test_all_of(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("all_of"); + //test_all_of_type(ts, tr); //FIXME + //test_all_of_type(ts, tr); //FIXME + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/any_of.cc b/test/insn/any_of.cc new file mode 100644 index 00000000..4f8f329c --- /dev/null +++ b/test/insn/any_of.cc @@ -0,0 +1,121 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +//algorithm +#include +#include /* srand, rand */ +#include /* time */ + +namespace SIMDPP_ARCH_NAMESPACE { + +template + struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; +}; + +template +struct AnyOffFuzzingTest +{ + AnyOffFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(5) {} + void operator()(TestReporter& tr) + { + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + srand((unsigned int)time(nullptr)); + for (auto size : m_sizes) + { + {//aligned input/ouput predicate match + auto input(DataGeneratorAligned>(size, m_generator)); + input[(size_t)(rand() % input.size())]=10; + auto res_std = std::any_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput predicate match + auto input(DataGenerator>(size, m_generator)); + input[(size_t)(rand() % input.size())] = 10; + auto res_std = std::any_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + + {//aligned input/ouput predicate fail + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::any_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput predicate fail + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::any_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; + +template + void test_any_of_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + + {//test with predicate + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + { //test prologue + vector_t ivect = { (T)1,(T)10 }; + auto res = any_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::any_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)5); + auto res = any_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::any_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + } + AnyOffFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_any_of(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("any_of"); + //test_any_of_type(ts, tr); //FIXME + //test_any_of_type(ts, tr); //FIXME + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/copy.cc b/test/insn/copy.cc new file mode 100644 index 00000000..fb032f96 --- /dev/null +++ b/test/insn/copy.cc @@ -0,0 +1,109 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +//algorithm +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + +template +struct CopyFuzzingTest +{ + CopyFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + std::vector::alignment>> expected(size); + std::vector::alignment>> output(size); + std::copy(input.cbegin(), input.cend(),begin(expected)); + simdpp::copy(input.data(), input.data()+input.size(), output.data()); + TEST_EQUAL_COLLECTIONS(tr,output,expected); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + std::vector expected(size); + std::vector output(size); + std::copy(input.cbegin(), input.cend(), begin(expected)); + simdpp::copy(input.data(), input.data() + input.size(), output.data()); + TEST_EQUAL_COLLECTIONS(tr, output, expected); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template +void test_copy_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t ovect = { (T)0,(T)0 }; + + copy(ivect.data(), ivect.data() + ivect.size(), ovect.data()); + TEST_EQUAL_COLLECTIONS(tr, ivect, ovect); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, (T)42); + vector_t ovect(150, (T)0); + + copy(ivect.data(), ivect.data() + ivect.size(), ovect.data()); + TEST_EQUAL_COLLECTIONS(tr, ivect, ovect); + } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, (T)42); + vector_t ovect(150, (T)0); + + copy(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, ovect.data()+10u); + for (auto i = 0u; i < 10u; ++i) + { + TEST_EQUAL(tr, ovect[i], (T)0); + } + for (auto i = 10; i < ovect.size() - 10u; ++i) + { + TEST_EQUAL(tr, ovect[i], ivect[i]); + } + for (auto i = ovect.size() - 10u; i < ovect.size(); ++i) + { + TEST_EQUAL(tr, ovect[i], (T)0); + } + } + CopyFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_copy(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("copy"); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/copy_n.cc b/test/insn/copy_n.cc new file mode 100644 index 00000000..7cd905d8 --- /dev/null +++ b/test/insn/copy_n.cc @@ -0,0 +1,75 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +//algorithm +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + +template +void test_copy_n_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t ovect = { (T)0,(T)0 }; + + copy_n(ivect.data(),2, ovect.data()); + TEST_EQUAL_COLLECTIONS(tr, ivect, ovect); + } + { //test negative don't change ovect + vector_t ivect = { (T)42,(T)42 }; + vector_t ovect = { (T)0,(T)0 }; + + copy_n(ivect.data(), -2, ovect.data()); + + TEST_EQUAL(tr, (T)0, ovect[0]); + TEST_EQUAL(tr, (T)0, ovect[1]); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, (T)42); + vector_t ovect(150, (T)0); + + copy_n(ivect.data(),100, ovect.data()); + for (auto i = 0; i < 100; ++i) + { + TEST_EQUAL(tr, ovect[i], ivect[i]); + } + for (auto i = 100; i (ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/count.cc b/test/insn/count.cc new file mode 100644 index 00000000..124802ae --- /dev/null +++ b/test/insn/count.cc @@ -0,0 +1,89 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +//algorithm +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + +template +struct CountFuzzingTest +{ + CountFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(42) {} + void operator()(TestReporter& tr) + { + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + input[(input.size()-1)%2]=(T)0; + auto res_std=std::count(input.cbegin(), input.cend(),(T)42); + auto res_simd=simdpp::count(input.data(), input.data() + input.size(), (T)42); + TEST_EQUAL(tr, res_std, res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + input[(input.size()-1) % 2] = (T)0; + auto res_std = std::count(input.cbegin(), input.cend(), (T)42); + auto res_simd = simdpp::count(input.data(), input.data() + input.size(), (T)42); + TEST_EQUAL(tr, res_std, res_simd); + + } + } + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; + +template +void test_count_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + auto res = count(ivect.data(), ivect.data() + ivect.size(), (T)42); + auto resstd = std::count(begin(ivect), end(ivect), (T)42); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)42); + ivect[25] = (T)0; + ivect[49] = (T)0; + auto res = count(ivect.data(), ivect.data() + ivect.size(), (T)42); + auto resstd = std::count(begin(ivect), end(ivect), (T)42); + TEST_EQUAL(tr, res, resstd); + } + CountFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_count(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("count"); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/count_if.cc b/test/insn/count_if.cc new file mode 100644 index 00000000..1adf0093 --- /dev/null +++ b/test/insn/count_if.cc @@ -0,0 +1,106 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +//algorithm +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; +}; + +template +struct CountIfFuzzingTest +{ + CountIfFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(42) {} + void operator()(TestReporter& tr) + { + const auto pred = UnaryPredicateEqualValue((T)42); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + input[(input.size() - 1) % 2] = (T)0; + auto res_std = std::count_if(input.cbegin(), input.cend(),pred); + auto res_simd = simdpp::count_if(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, res_std, res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + input[(input.size() - 1) % 2] = (T)0; + auto res_std = std::count_if(input.cbegin(), input.cend(), pred); + auto res_simd = simdpp::count_if(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, res_std, res_simd); + + } + } + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; + +template +void test_count_if_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + const auto pred = UnaryPredicateEqualValue((T)42); + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + auto res = count_if(ivect.data(), ivect.data() + ivect.size(), pred); + auto resstd = std::count_if(begin(ivect), end(ivect), pred); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)42); + ivect[25] = (T)0; + ivect[49] = (T)0; + auto res = count_if(ivect.data(), ivect.data() + ivect.size(), pred); + auto resstd = std::count_if(begin(ivect), end(ivect), pred); + TEST_EQUAL(tr, res, resstd); + } + CountIfFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_count_if(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("count_if"); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/equal.cc b/test/insn/equal.cc new file mode 100644 index 00000000..42b74834 --- /dev/null +++ b/test/insn/equal.cc @@ -0,0 +1,92 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +//algorithm +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct EqualFuzzingTest +{ + EqualFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(0) {} + void operator()(TestReporter& tr) + { + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto input2(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::equal(input.cbegin(), input.cend(), input2.cbegin()); + auto res_simd = simdpp::equal(input.data(), input.data() + input.size(), input2.data()); + TEST_EQUAL(tr, res_std, res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto input2(DataGenerator>(size, m_generator)); + auto res_std = std::equal(input.cbegin(), input.cend(),input2.cbegin()); + auto res_simd = simdpp::equal(input.data(), input.data() + input.size(), input2.data()); + TEST_EQUAL(tr, res_std, res_simd); + + } + } + } + std::vector m_sizes; + GeneratorIota m_generator; +}; + +template +void test_equal_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect1 = { (T)42,(T)42 }; + vector_t ivect2 = { (T)0,(T)0 }; + + auto res=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); + TEST_EQUAL(tr,res,false); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect1(50); + std::iota(begin(ivect1),end(ivect1),(T)1); + vector_aligned_t ivect2(50); + std::copy(begin(ivect1),end(ivect1),begin(ivect2)); + auto res=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); + TEST_EQUAL(tr,res,true); + ivect2[25]=0; + auto res2=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); + TEST_EQUAL(tr,res2,false); + } + EqualFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_equal(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("copy"); + // test_equal_type(ts, tr); //FIXME + // test_equal_type(ts, tr); //FIXME + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/fill.cc b/test/insn/fill.cc new file mode 100644 index 00000000..a69c27a7 --- /dev/null +++ b/test/insn/fill.cc @@ -0,0 +1,96 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + +template +struct FillFuzzingTest +{ + FillFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(42) {} + void operator()(TestReporter& tr) + { + for (auto size : m_sizes) + { + {//aligned input/ouput + auto expected(DataGeneratorAligned>(size, m_generator)); + std::vector::alignment>> output(size); + simdpp::fill(output.data(), output.data() + output.size(),(T)42); + TEST_EQUAL_COLLECTIONS(tr, output, expected); + } + {//unaligned input/ouput + auto expected(DataGenerator>(size, m_generator)); + std::vector output(size); + simdpp::fill(output.data(), output.data() + output.size(), (T)42); + TEST_EQUAL_COLLECTIONS(tr, output, expected); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; + + +template +void test_fill_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)0,(T)1 }; + vector_t expected = { (T)42,(T)42 }; + + fill(ivect.data(), ivect.data() + ivect.size(), (T)42); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, (T)0); + vector_t expected(150, (T)42); + + fill(ivect.data(), ivect.data() + ivect.size(), (T)42); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, (T)0); + vector_t expected(150, (T)42); + + fill(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, (T)42); + for (auto i = 10; i < expected.size() - 10u; ++i) + { + TEST_EQUAL(tr, expected[i], ivect[i]); + } + } + FillFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_fill(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("fill"); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/find.cc b/test/insn/find.cc new file mode 100644 index 00000000..348dcfaf --- /dev/null +++ b/test/insn/find.cc @@ -0,0 +1,92 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + +template +struct FindFuzzingTest +{ + FindFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(0) {} + void operator()(TestReporter& tr) + { + for (auto size : m_sizes) + { + const auto val = ((size - 1) / 2)+1; + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::find(input.cbegin(), input.cend(), val); + auto res_simd = simdpp::find(input.data(), input.data() + input.size(),val); + TEST_EQUAL(tr, *res_std, *res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::find(input.cbegin(), input.cend(), val); + auto res_simd = simdpp::find(input.data(), input.data() + input.size(), val); + TEST_EQUAL(tr, *res_std, *res_simd); + } + } + } + std::vector m_sizes; + GeneratorIota m_generator; +}; + +template +void test_find_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + {//test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), T(1)); + auto resstd = std::find(begin(ivect), end(ivect),(T)3); + auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)3); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop and epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + auto resstd = std::find(begin(ivect), end(ivect),(T)98); + auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)98); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + auto resstd = std::find(begin(ivect), end(ivect),(T)50); + auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)50); + TEST_EQUAL(tr, *resstd, *res); + } + FindFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_find(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("find"); + test_find_type(ts, tr); + test_find_type(ts, tr); + // test_find_type(ts, tr); //FIXME + // test_find_type(ts, tr); //FIXME + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/find_if.cc b/test/insn/find_if.cc new file mode 100644 index 00000000..dd24d4f2 --- /dev/null +++ b/test/insn/find_if.cc @@ -0,0 +1,111 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct UnaryPredicateSupValue +{ +public: + UnaryPredicateSupValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a > m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_gt(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; +}; + +template +struct FindIfFuzzingTest +{ + FindIfFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(0) {} + void operator()(TestReporter& tr) + { + + for (auto size : m_sizes) + { + const auto pred = UnaryPredicateSupValue((T)((size - 1) / 2)); + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::find_if(input.cbegin(), input.cend(),pred); + auto res_simd = simdpp::find_if(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, *res_std, *res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::find_if(input.cbegin(), input.cend(), pred); + auto res_simd = simdpp::find_if(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, *res_std, *res_simd); + + } + } + } + std::vector m_sizes; + GeneratorIota m_generator; +}; + +template +void test_find_if_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + {//test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), T(1)); + const auto SupThree = UnaryPredicateSupValue((T)3); + auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 3; }); + auto res = find_if(ivect.data(), ivect.data() + ivect.size(), SupThree); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop and epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateSupValue((T)98); + auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 98; }); + auto res = find_if(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateSupValue((T)50); + auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 50; }); + auto res = find_if(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + FindIfFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_find_if(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("find_if"); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + //test_find_if_type(ts, tr); //FIXME + //test_find_if_type(ts, tr); //FIXME + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/find_if_not.cc b/test/insn/find_if_not.cc new file mode 100644 index 00000000..8501cad1 --- /dev/null +++ b/test/insn/find_if_not.cc @@ -0,0 +1,111 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct UnaryPredicateInfValue +{ +public: + UnaryPredicateInfValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a < m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_lt(a, m_val_simd); } +private: + T m_val; + simd_type_T m_val_simd; +}; + +template +struct FindIfNotFuzzingTest +{ + FindIfNotFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(0) {} + void operator()(TestReporter& tr) + { + + for (auto size : m_sizes) + { + const auto pred = UnaryPredicateInfValue((T)((size - 1) / 2)); + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::find_if_not(input.cbegin(), input.cend(), pred); + auto res_simd = simdpp::find_if_not(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, *res_std, *res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::find_if_not(input.cbegin(), input.cend(), pred); + auto res_simd = simdpp::find_if_not(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, *res_std, *res_simd); + + } + } + } + std::vector m_sizes; + GeneratorIota m_generator; +}; + +template +void test_find_if_not_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + {//test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), T(1)); + const auto InfThree = UnaryPredicateInfValue((T)3); + auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 3; }); + auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), InfThree); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop and epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateInfValue((T)98); + auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 98; }); + auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateInfValue((T)50); + auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 50; }); + auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + FindIfNotFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_find_if_not(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("find_if_not"); + //test_find_if_not_type(ts, tr); //FIXME missing reduce and + //test_find_if_not_type(ts, tr); //FIXME missing reduce and + //test_find_if_not_type(ts, tr); //FIXME + //test_find_if_not_type(ts, tr); //FIXME + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/lexicographical_compare.cc b/test/insn/lexicographical_compare.cc new file mode 100644 index 00000000..94b6d972 --- /dev/null +++ b/test/insn/lexicographical_compare.cc @@ -0,0 +1,97 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct LexicographicalCompareFuzzingTest +{ + LexicographicalCompareFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator1(0), m_generator2(1) {} + void operator()(TestReporter& tr) + { + + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator1)); + auto input2(DataGeneratorAligned>(size, m_generator2)); + auto res_std = std::lexicographical_compare(input.cbegin(), input.cend(), input2.cbegin(), input2.cend()); + auto res_simd = simdpp::lexicographical_compare(input.data(), input.data() + input.size(), input2.data(), input2.data() + input2.size() ); + TEST_EQUAL(tr, res_std, res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator1)); + auto input2(DataGenerator>(size, m_generator2)); + auto res_std = std::lexicographical_compare(input.cbegin(), input.cend(), input2.cbegin(), input2.cend()); + auto res_simd = simdpp::lexicographical_compare(input.data(), input.data() + input.size(), input2.data(), input2.data() + input2.size()); + TEST_EQUAL(tr, res_std, res_simd); + } + } + } + std::vector m_sizes; + GeneratorIota m_generator1; + GeneratorIota m_generator2; +}; + +template + void test_lexicograpical_compare_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_aligned_t ivect = {(T)0,(T)1}; + vector_aligned_t ivect2={(T)1,(T)2}; + auto res = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect2.data(),ivect2.data()+ivect2.size()); + auto resstd = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect2), end(ivect2)); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50); + vector_aligned_t ivect2(50); + std::iota(begin(ivect),end(ivect),(T)0); + std::iota(begin(ivect2),end(ivect2),(T)1); + auto res = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect2.data(),ivect2.data()+ivect2.size()); + auto resstd = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect2), end(ivect2)); + TEST_EQUAL(tr, res, resstd); + auto resinv = lexicographical_compare(ivect2.data(),ivect2.data()+ivect2.size(),ivect.data(),ivect.data()+ivect.size()); + auto resstdinv = std::lexicographical_compare(begin(ivect2), end(ivect2),begin(ivect), end(ivect)); + TEST_EQUAL(tr, resinv, resstdinv); + auto ressame = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect.data(),ivect.data()+ivect.size()); + auto resstdsame = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect), end(ivect)); + TEST_EQUAL(tr, ressame, resstdsame); + } + LexicographicalCompareFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_lexicographical_compare(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("lexicographical_compare"); + //test_lexicograpical_compare_type(ts, tr); //FIXME + //test_lexicograpical_compare_type(ts, tr); //FIXME + //test_lexicograpical_compare_type(ts, tr); //FIXME + //test_lexicograpical_compare_type(ts, tr); //FIXME + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/max.cc b/test/insn/max.cc new file mode 100644 index 00000000..d874e9ee --- /dev/null +++ b/test/insn/max.cc @@ -0,0 +1,154 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct binary_cmp_greater +{ +public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a, T b) const SIMDPP_NOEXCEPT { return a > b; } + + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) const SIMDPP_NOEXCEPT { return simdpp::cmp_gt(a, b); } +}; + +template +typename Container::value_type ExtractMaxFromContainer(const Container& cont) +{ + typedef typename Container::value_type value_type; + value_type current = std::numeric_limits::lowest(); + auto it=cont.cbegin(),itend=cont.cend(); + for (; it != itend; ++it) + { + current=std::max(current,*it); + } + return current; +} +template +struct MaxFuzzingTest +{ + MaxFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + auto cmpOPGreater = binary_cmp_greater(); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res=simdpp::max(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, res, ExtractMaxFromContainer(input)); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res = simdpp::max(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, res, ExtractMaxFromContainer(input)); + } + {//aligned input/ouput + predicate + auto input(DataGeneratorAligned>(size, m_generator)); + auto res = simdpp::max(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, res, ExtractMaxFromContainer(input)); + } + {//unaligned input/ouput + predicate + auto input(DataGenerator>(size, m_generator)); + auto res = simdpp::max(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, res, ExtractMaxFromContainer(input)); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; +template +void test_max_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical max + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::lowest(), max(ivect.data() + ivect.size(), ivect.data() + ivect.size())); + } + } + {//test max with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::lowest(), max(ivect.data() + ivect.size(), ivect.data() + ivect.size(), cmpOPGreater)); + } + } + MaxFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144});//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_max(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("max"); + + test_max_type(ts, tr); + test_max_type(ts, tr); + //test_max_type(ts, tr); //FIXME + //test_max_type(ts, tr); //FIXME + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/max_element.cc b/test/insn/max_element.cc new file mode 100644 index 00000000..6d770887 --- /dev/null +++ b/test/insn/max_element.cc @@ -0,0 +1,140 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct binary_cmp_greater +{ +public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a, T b) const SIMDPP_NOEXCEPT { return a < b; } + + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) const SIMDPP_NOEXCEPT { return simdpp::cmp_lt(a, b); } +}; + +template +struct MaxElementFuzzingTest +{ + MaxElementFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + auto cmpOPGreater = binary_cmp_greater(); + + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::max_element(input.cbegin(), input.cend()); + auto res_simd = simdpp::max_element(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, *res_std,*res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::max_element(input.cbegin(), input.cend()); + auto res_simd = simdpp::max_element(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, *res_std, *res_simd); + + } + {//aligned input/ouput + predicate + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::max_element(input.cbegin(), input.cend(), cmpOPGreater); + auto res_simd = simdpp::max_element(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, *res_std, *res_simd); + } + {//unaligned input/ouput + predicate + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::max_element(input.cbegin(), input.cend(), cmpOPGreater); + auto res_simd = simdpp::max_element(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, *res_std, *res_simd); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template +void test_max_element_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical max + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + } + } + {//test max with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + } + MaxElementFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144});//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_max_element(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("max_element"); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + //test_max_type(ts, tr); //FIXME + //test_max_type(ts, tr); //FIXME + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/min.cc b/test/insn/min.cc new file mode 100644 index 00000000..34ef7662 --- /dev/null +++ b/test/insn/min.cc @@ -0,0 +1,158 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct binary_cmp_greater +{ +public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a, T b) const SIMDPP_NOEXCEPT { return a > b; } + + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) const SIMDPP_NOEXCEPT { return simdpp::cmp_gt(a, b); } +}; + +template +typename Container::value_type ExtractMinFromContainer(const Container& cont) +{ + typedef typename Container::value_type value_type; + value_type current = std::numeric_limits::max(); + auto it = cont.cbegin(), itend = cont.cend(); + for (; it != itend; ++it) + { + current = std::min(current, *it); + } + return current; +} + +template +struct MinFuzzingTest +{ + MinFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + auto cmpOPGreater = binary_cmp_greater(); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res = simdpp::min(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, res, ExtractMinFromContainer(input)); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res = simdpp::min(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, res, ExtractMinFromContainer(input)); + } + {//aligned input/ouput + predicate + auto input(DataGeneratorAligned>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res = simdpp::min(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, res, ExtractMinFromContainer(input)); + } + {//unaligned input/ouput + predicate + auto input(DataGenerator>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res = simdpp::min(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, res, ExtractMinFromContainer(input)); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; +template +void test_min_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical min + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::max(), min(ivect.data() + ivect.size(), ivect.data() + ivect.size())); + } + } + {//test min with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::max(), min(ivect.data() + ivect.size(), ivect.data() + ivect.size(), cmpOPGreater)); + } + } + MinFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_min(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("min"); + test_min_type(ts, tr); + test_min_type(ts, tr); + //test_min_type(ts, tr); //FIXME + //test_min_type(ts, tr); //FIXME + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/min_element.cc b/test/insn/min_element.cc new file mode 100644 index 00000000..f2a4d8ee --- /dev/null +++ b/test/insn/min_element.cc @@ -0,0 +1,144 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct binary_cmp_greater +{ +public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a, T b) const SIMDPP_NOEXCEPT { return a < b; } + + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) const SIMDPP_NOEXCEPT { return simdpp::cmp_lt(a, b); } +}; + +template +struct MinElementFuzzingTest +{ + MinElementFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + auto cmpOPGreater = binary_cmp_greater(); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res_std = std::min_element(input.cbegin(), input.cend()); + auto res_simd = simdpp::min_element(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, *res_std, *res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res_std = std::min_element(input.cbegin(), input.cend()); + auto res_simd = simdpp::min_element(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, *res_std, *res_simd); + + } + {//aligned input/ouput + predicate + auto input(DataGeneratorAligned>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res_std = std::min_element(input.cbegin(), input.cend(), cmpOPGreater); + auto res_simd = simdpp::min_element(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, *res_std, *res_simd); + + } + {//unaligned input/ouput + predicate + auto input(DataGenerator>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res_std = std::min_element(input.cbegin(), input.cend(), cmpOPGreater); + auto res_simd = simdpp::min_element(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, *res_std, *res_simd); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template +void test_min_element_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical max + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + } + } + {//test max with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + } + MinElementFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_min_element(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("min_element"); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + //test_max_type(ts, tr); //FIXME + //test_max_type(ts, tr); //FIXME + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/none_of.cc b/test/insn/none_of.cc new file mode 100644 index 00000000..e9869c66 --- /dev/null +++ b/test/insn/none_of.cc @@ -0,0 +1,121 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +#include /* srand, rand */ +#include /* time */ + +namespace SIMDPP_ARCH_NAMESPACE { + +template + struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; +}; + +template +struct NoneOffFuzzingTest +{ + NoneOffFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(5) {} + void operator()(TestReporter& tr) + { + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + srand((unsigned int)time(nullptr)); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + input[(size_t)(rand() % input.size())] = 10; + auto res_std = std::none_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput + auto input(DataGenerator>(size, m_generator)); + input[(size_t)(rand() % input.size())] = 10; + auto res_std = std::none_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::none_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::none_of(input.cbegin(), input.cend(), predEqualTen); + auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; + +template + void test_none_of_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + + {//test with predicate + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + { //test prologue + vector_t ivect = { (T)1,(T)2}; + auto res = none_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::none_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)5); + ivect[49]=(T)10; + auto res = none_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::none_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + } + NoneOffFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_none_of(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("none_of"); + //test_none_of_type(ts, tr); //FIXME + //test_none_of_type(ts, tr); //FIXME + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/reduce.cc b/test/insn/reduce.cc new file mode 100644 index 00000000..32bfe24a --- /dev/null +++ b/test/insn/reduce.cc @@ -0,0 +1,188 @@ +/* Copyright (C) 2018 Povilas Kanapickas +Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template< typename T> +struct BinaryOpPlus +{ +public: + BinaryOpPlus() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT + { + return a0 + a1; + } + + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT + { + return a0 + a1; + } +}; + +//from https://stackoverflow.com/questions/17333/what-is-the-most-effective-way-for-float-and-double-comparison +template +bool approximatelyEqual(T a, T b, T epsilon) +{ + return fabs(a - b) <= ((fabs(a) < fabs(b) ? fabs(b) : fabs(a)) * epsilon); +} + +template +struct ReduceFuzzingTest +{ + ReduceFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + T init = (T)0; + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::accumulate(input.cbegin(), input.cend(), init); + //auto res_std=std::reduce(cbegin(input), cend(input),init); + auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + auto res_std = std::accumulate(input.cbegin(), input.cend(), init); + //auto res_std = std::reduce(cbegin(input), cend(input), init); + auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template +struct ReduceBinaryFuzzingTest +{ + ReduceBinaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + T init = (T)0; + auto opPlus = BinaryOpPlus(); + T neutral = (T)0; + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + //auto res_std = std::reduce(cbegin(input), cend(input),init, opPlus); + auto res_std = std::accumulate(input.cbegin(), input.cend(), init, opPlus); + auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init, neutral, opPlus); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + //auto res_std = std::reduce(cbegin(input), cend(input), init, opPlus); + auto res_std = std::accumulate(input.cbegin(), input.cend(), init, opPlus); + auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init, neutral, opPlus); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template +void test_reduce_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { 0,41 }; + T expected = { 42 }; + T init = { 1 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init); + TEST_EQUAL(tr, expected, res); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(42, 1); + T expected = { 42 }; + T init = { 0 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init); + TEST_EQUAL(tr, expected, res); + } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, 1); + T expected = { 42 }; + T init = { 0 }; + T res = reduce(ivect.data() + 8u, ivect.data() + ivect.size() - 100u, init); + TEST_EQUAL(tr, expected, res); + } + ReduceFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +template +void test_reducebinop_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto opPlus = BinaryOpPlus(); + + { //test prologue + vector_t ivect = { 1,42 }; + T expected = { 43 }; + T init = { 0 }; + T neutral = { 0 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init, neutral, opPlus); + TEST_EQUAL(tr, expected, res); + } + { //test main loop and epilogue on unaligned vector + vector_t ivect(125, 1); + T expected = { 125 }; + T init = { 0 }; + T neutral = { 0 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init, neutral, opPlus); + TEST_EQUAL(tr, expected, res); + } + ReduceBinaryFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_reduce(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("reduce"); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + /* test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr);*/ + + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + /*test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr);*/ + +} +} diff --git a/test/insn/replace.cc b/test/insn/replace.cc new file mode 100644 index 00000000..0f75e969 --- /dev/null +++ b/test/insn/replace.cc @@ -0,0 +1,89 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template +struct ReplaceFuzzingTest +{ + ReplaceFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generatorFive(5),m_generatorTen(10) {} + void operator()(TestReporter& tr) + { + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generatorFive)); + auto expected(DataGeneratorAligned>(size, m_generatorTen)); + simdpp::replace(input.data(),input.data()+input.size(),(T)5,(T)10); + TEST_EQUAL_COLLECTIONS(tr, input, expected); + } + {//non aligned input/ouput + auto input(DataGenerator>(size, m_generatorFive)); + auto expected(DataGenerator>(size, m_generatorTen)); + simdpp::replace(input.data(), input.data() + input.size(),(T)5, (T)10); + TEST_EQUAL_COLLECTIONS(tr, input, expected); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generatorFive; + GeneratorConstant m_generatorTen; +}; + +template + void test_replace_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t expected = { (T)0,(T)0 }; + + replace(ivect.data(),ivect.data()+ivect.size(),(T)42,(T)0 ); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)0); + vector_aligned_t expected(50); + std::copy(begin(ivect),end(ivect),begin(expected)); + expected[39]=42; + replace(ivect.data(),ivect.data()+ivect.size(),(T)39,(T)42 ); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + } + ReplaceFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_replace(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("replace"); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/replace_if.cc b/test/insn/replace_if.cc new file mode 100644 index 00000000..d56a2a50 --- /dev/null +++ b/test/insn/replace_if.cc @@ -0,0 +1,108 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + +template + struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; + + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; +}; + +template +struct ReplaceIfFuzzingTest +{ + ReplaceIfFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generatorFive(5), m_generatorTen(10) {} + void operator()(TestReporter& tr) + { + const auto pred = UnaryPredicateEqualValue((T)5); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generatorFive)); + auto expected(DataGeneratorAligned>(size, m_generatorTen)); + simdpp::replace_if(input.data(), input.data() + input.size(), pred, (T)10); + TEST_EQUAL_COLLECTIONS(tr, input, expected); + } + {//non aligned input/ouput + auto input(DataGenerator>(size, m_generatorFive)); + auto expected(DataGenerator>(size, m_generatorTen)); + simdpp::replace_if(input.data(), input.data() + input.size(), pred, (T)10); + TEST_EQUAL_COLLECTIONS(tr, input, expected); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generatorFive; + GeneratorConstant m_generatorTen; +}; + + +template + void test_replace_if_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t expected = { (T)0,(T)0 }; + const auto pred = UnaryPredicateEqualValue((T)42); + replace_if(ivect.data(),ivect.data()+ivect.size(),pred,(T)0 ); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)0); + vector_aligned_t expected(50); + std::copy(begin(ivect),end(ivect),begin(expected)); + expected[39]=42; + const auto pred = UnaryPredicateEqualValue((T)39); + replace_if(ivect.data(),ivect.data()+ivect.size(),pred,(T)42 ); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + } + ReplaceIfFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_replace_if(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("replace_if"); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/tests.cc b/test/insn/tests.cc index 1dfe6978..04879e8a 100644 --- a/test/insn/tests.cc +++ b/test/insn/tests.cc @@ -46,32 +46,57 @@ static_assert(sizeof(simdpp::float64<8>) == 64, "Incorrect vector size"); namespace SIMDPP_ARCH_NAMESPACE { -void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& opts) -{ - test_test_utils(res); - - // Tests are ordered in such a way so that base functionality that other - // tests depend on is tested first. - test_construct(res); - test_memory_load(res, tr); - test_memory_store(res, tr); - - test_blend(res); - test_bitwise(res, tr); - test_permute_generic(res); - test_shuffle_generic(res); - test_shuffle(res); - test_shuffle_bytes(res, tr); - - test_convert(res); - test_math_fp(res, opts); - test_math_int(res); - test_compare(res); - test_math_shift(res); - test_transpose(res); - - test_for_each(res, tr); -} + void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& opts) + { + test_test_utils(res); + + // Tests are ordered in such a way so that base functionality that other + // tests depend on is tested first. + test_construct(res); + test_memory_load(res, tr); + test_memory_store(res, tr); + + test_blend(res); + test_bitwise(res, tr); + test_permute_generic(res); + test_shuffle_generic(res); + test_shuffle(res); + test_shuffle_bytes(res, tr); + + test_convert(res); + test_math_fp(res, opts); + test_math_int(res); + test_compare(res); + test_math_shift(res); + test_transpose(res); + + test_for_each(res, tr); + + //algorithm + test_all_of(res,tr); + test_any_of(res,tr); + test_copy(res,tr); + test_copy_n(res,tr); + test_count(res,tr); + test_count_if(res,tr); + test_equal(res,tr); + test_fill(res,tr); + test_find(res, tr); + test_find_if(res, tr); + test_find_if_not(res, tr); + test_lexicographical_compare(res,tr); + test_max(res,tr); + test_max_element(res, tr); + test_min(res,tr); + test_min_element(res, tr); + test_none_of(res,tr); + test_replace(res,tr); + test_replace_if(res,tr); + test_reduce(res, tr); + test_transform(res, tr); + test_transform_reduce(res, tr); + + } } // namespace SIMDPP_ARCH_NAMESPACE /* TODO: here we use dispatcher only to register the available functions, not @@ -93,7 +118,7 @@ std::vector get_test_archs() using FunPtr = void(*)(TestResults&, TestReporter&, const TestOptions&); SIMDPP_DISPATCH_COLLECT_FUNCTIONS(versions, main_test_function, FunPtr) std::vector result; - result.assign(versions, versions+SIMDPP_DISPATCH_MAX_ARCHS); + result.assign(versions, versions + SIMDPP_DISPATCH_MAX_ARCHS); return result; } #endif diff --git a/test/insn/tests.h b/test/insn/tests.h index 5fa1ce0a..69f8b04e 100644 --- a/test/insn/tests.h +++ b/test/insn/tests.h @@ -17,25 +17,47 @@ namespace SIMDPP_ARCH_NAMESPACE { void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& opts); -void test_bitwise(TestResults& res, TestReporter& tr); -void test_blend(TestResults& res); -void test_compare(TestResults& res); -void test_convert(TestResults& res); -void test_construct(TestResults& res); -void test_for_each(TestResults& res, TestReporter& tr); -void test_math_fp(TestResults& res, const TestOptions& opts); -void test_math_int(TestResults& res); -void test_math_shift(TestResults& res); -void test_memory_load(TestResults& res, TestReporter& tr); -void test_memory_store(TestResults& res, TestReporter& tr); -void test_set(TestResults& res); -void test_shuffle(TestResults& res); -void test_shuffle_bytes(TestResults& res, TestReporter& tr); -void test_shuffle_generic(TestResults& res); -void test_permute_generic(TestResults& res); -void test_shuffle_transpose(TestResults& res); -void test_test_utils(TestResults& res); -void test_transpose(TestResults& res); + void test_all_of(TestResults& res, TestReporter& tr); + void test_any_of(TestResults& res, TestReporter& tr); + void test_bitwise(TestResults& res, TestReporter& tr); + void test_blend(TestResults& res); + void test_compare(TestResults& res); + void test_convert(TestResults& res); + void test_construct(TestResults& res); + void test_copy(TestResults& res, TestReporter& tr); + void test_copy_n(TestResults& res, TestReporter& tr); + void test_count(TestResults& res, TestReporter& tr); + void test_count_if(TestResults& res, TestReporter& tr); + void test_equal(TestResults& res, TestReporter& tr); + void test_for_each(TestResults& res, TestReporter& tr); + void test_fill(TestResults& res, TestReporter& tr); + void test_find(TestResults& res, TestReporter& tr); + void test_find_if(TestResults& res, TestReporter& tr); + void test_find_if_not(TestResults& res, TestReporter& tr); + void test_lexicographical_compare(TestResults& res, TestReporter& tr); + void test_math_fp(TestResults& res, const TestOptions& opts); + void test_math_int(TestResults& res); + void test_math_shift(TestResults& res); + void test_max(TestResults& res, TestReporter& tr); + void test_max_element(TestResults& res, TestReporter& tr); + void test_memory_load(TestResults& res, TestReporter& tr); + void test_memory_store(TestResults& res, TestReporter& tr); + void test_min(TestResults& res, TestReporter& tr); + void test_min_element(TestResults& res, TestReporter& tr); + void test_none_of(TestResults& res, TestReporter& tr); + void test_replace(TestResults& res, TestReporter& tr); + void test_replace_if(TestResults& res, TestReporter& tr); + void test_set(TestResults& res); + void test_shuffle(TestResults& res); + void test_shuffle_bytes(TestResults& res, TestReporter& tr); + void test_shuffle_generic(TestResults& res); + void test_permute_generic(TestResults& res); + void test_reduce(TestResults& res, TestReporter& tr); + void test_shuffle_transpose(TestResults& res); + void test_test_utils(TestResults& res); + void test_transpose(TestResults& res); + void test_transform(TestResults& res, TestReporter& tr); + void test_transform_reduce(TestResults& res, TestReporter& tr); } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/transform.cc b/test/insn/transform.cc new file mode 100644 index 00000000..a0685001 --- /dev/null +++ b/test/insn/transform.cc @@ -0,0 +1,223 @@ +/* Copyright (C) 2018 Povilas Kanapickas +Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + +template< typename T> +struct UnaryOpAddValue +{ + T m_val; +public: + UnaryOpAddValue(T val) :m_val(val) {} + SIMDPP_INL T operator()(T const &a) const SIMDPP_NOEXCEPT + { + return m_val + a; + } + + template + SIMDPP_INL U operator()(U const &a) const SIMDPP_NOEXCEPT + { + return m_val + a; + } +}; + +template< typename T> +struct BinaryOpAdd +{ +public: + BinaryOpAdd() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT + { + return a0 + a1; + } + + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT + { + using namespace simdpp; + return a0 + a1; + } +}; + + +template +struct TransformUnaryFuzzingTest +{ + TransformUnaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + auto opPlusOne = UnaryOpAddValue(1); + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + std::vector::alignment>> expected(size); + std::vector::alignment>> output(size); + std::transform(input.cbegin(), input.cend(),expected.begin(), opPlusOne); + simdpp::transform(input.data(), input.data() + input.size(), output.data(), opPlusOne); + TEST_EQUAL_COLLECTIONS(tr, output, expected); + } + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + std::vector expected(size); + std::vector output(size); + std::transform(input.cbegin(), input.cend(),expected.begin(), opPlusOne); + simdpp::transform(input.data(), input.data() + input.size(), output.data(), opPlusOne); + TEST_EQUAL_COLLECTIONS(tr, output, expected); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template +struct TransformBinaryFuzzingTest +{ + TransformBinaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + auto opPlus = BinaryOpAdd(); + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input1(DataGeneratorAligned>(size, m_generator)); + const auto input2(DataGeneratorAligned>(size, m_generator)); + std::vector::alignment>> expected(size); + std::vector::alignment>> output(size); + std::transform(input1.cbegin(), input1.cend(), input2.cbegin(), expected.begin(), opPlus); + simdpp::transform(input1.data(), input1.data() + input1.size(), input2.data(), output.data(), opPlus); + TEST_EQUAL_COLLECTIONS(tr, output, expected); + } + {//non aligned input/ouput + const auto input1(DataGenerator>(size, m_generator)); + const auto input2(DataGenerator>(size, m_generator)); + std::vector expected(size); + std::vector output(size); + std::transform(input1.cbegin(), input1.cend(), input2.cbegin(), expected.begin(), opPlus); + simdpp::transform(input1.data(), input1.data() + input1.size(), input2.data(), output.data(), opPlus); + TEST_EQUAL_COLLECTIONS(tr, output, expected); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; +template +void test_transform_type_unary(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto opPlusOne = UnaryOpAddValue(1); + { //test prologue + vector_t ivect = { 0,1 }; + vector_t ovect(2); + vector_t expected = { 1,2 }; + + transform(ivect.data(), ivect.data() + ivect.size(), ovect.data(), opPlusOne); + TEST_EQUAL_COLLECTIONS(tr, ovect, expected); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, 0); + vector_aligned_t ovect(150); + vector_t expected(150, 1); + + transform(ivect.data(), ivect.data() + ivect.size(), ovect.data(), opPlusOne); + TEST_EQUAL_COLLECTIONS(tr, ovect, expected); + } + TransformUnaryFuzzingTest fuzzing({1,3,5,8,21,55,89,144});//0 generate null ptr inputs/ouput + fuzzing(tr); + + +} +template +void test_transform_type_binary(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_u = std::vector; + using vector_aligned_t = std::vector::alignment>>; + using vector_aligned_u = std::vector::alignment>>; + auto opPlus = BinaryOpAdd(); + { //test prologue + vector_t ivect1 = { 0,1 }; + vector_u ivect2 = { 1,2 }; + vector_t ovect(2); + vector_t expected = { 1,3 }; + + transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); + for (auto i = 0u; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect1(150, 0); + vector_aligned_t ivect2(150, 1); + vector_aligned_t ovect(150); + vector_t expected(150, 1); + + transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); + for (auto i = 0u; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + { //test main loop and epilogue on range + vector_aligned_t ivect1(150, 0); + vector_aligned_t ivect2(150, 1); + vector_aligned_t ovect(150); + vector_t expected(150, 1); + + transform(ivect1.data() + 10u, ivect1.data() + ivect1.size() - 10u, ivect2.data() + 10u, ovect.data() + 10u, opPlus); + for (auto i = 10u; i < ovect.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + TransformBinaryFuzzingTest fuzzing({1,3,5,8,21,55,89,144 }); //0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_transform(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("transform"); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/transform_reduce.cc b/test/insn/transform_reduce.cc new file mode 100644 index 00000000..31a7a15b --- /dev/null +++ b/test/insn/transform_reduce.cc @@ -0,0 +1,178 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { +using namespace simdpp; + +template + struct UnaryPredicateSquare +{ + using simd_type_T = typename simd_traits::simd_type; + SIMDPP_INL T operator()(T a) const SIMDPP_NOEXCEPT {return a*a;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT {return a*a;} +}; + +template + struct BinaryPredicatePlus +{ + using simd_type_T = typename simd_traits::simd_type; + SIMDPP_INL T operator()(T a0,T a1) const SIMDPP_NOEXCEPT {return a0 + a1;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) const SIMDPP_NOEXCEPT {return a0+a1;} +}; + +template + struct BinaryPredicateMul +{ + using simd_type_T = typename simd_traits::simd_type; + SIMDPP_INL T operator()(T a0,T a1) const SIMDPP_NOEXCEPT {return a0 * a1;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) const SIMDPP_NOEXCEPT {return a0*a1;} +}; + +//from https://stackoverflow.com/questions/17333/what-is-the-most-effective-way-for-float-and-double-comparison +template +bool approximatelyEqual(T a, T b, T epsilon) +{ + return fabs(a - b) <= ((fabs(a) < fabs(b) ? fabs(b) : fabs(a)) * epsilon); +} + +template +struct TransformReduceUnaryFuzzingTest +{ + TransformReduceUnaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + T init = (T)0; + + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::inner_product(input.data(), input.data() + input.size(), input.data(),init); + auto res_simd = simdpp::transform_reduce(input.data(), input.data() + input.size(),init, BinaryPredicatePlus(), UnaryPredicateSquare()); + TEST_EQUAL(tr, approximatelyEqual(res_std,res_simd, 10 * std::numeric_limits::epsilon()),true); + } + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + auto res_std = std::inner_product(input.data(), input.data() + input.size(), input.data(), init); + auto res_simd = simdpp::transform_reduce(input.data(), input.data() + input.size(), init, BinaryPredicatePlus(), UnaryPredicateSquare()); + TEST_EQUAL(tr, approximatelyEqual(res_std,res_simd, 10 * std::numeric_limits::epsilon()), true); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template +struct TransformReduceBinaryFuzzingTest +{ + TransformReduceBinaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + T init = (T)1; + + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + const auto input2(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::inner_product(input.data(), input.data() + input.size(), input2.data(), init); + auto res_simd = simdpp::transform_reduce(input.data(), input.data() + input.size(), input2.data(), init, BinaryPredicatePlus(), BinaryPredicateMul()); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + const auto input2(DataGenerator>(size, m_generator)); + auto res_std = std::inner_product(input.data(), input.data() + input.size(), input2.data(), init); + auto res_simd = simdpp::transform_reduce(input.data(), input.data() + input.size(), input2.data(), init, BinaryPredicatePlus(), BinaryPredicateMul()); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template + void test_transform_reduce_type_unary(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + {// + vector_aligned_t ivect={(T)0,T(1)}; + auto res=transform_reduce(ivect.data(),ivect.data()+ivect.size(),(T)0,BinaryPredicatePlus(),UnaryPredicateSquare()); + auto expected=std::inner_product( ivect.data(),ivect.data()+ivect.size(), ivect.data(), T(0)); + TEST_EQUAL(tr, expected,res); + } + {// + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)1); + auto res=transform_reduce(ivect.data(),ivect.data()+ivect.size(),(T)0,BinaryPredicatePlus(),UnaryPredicateSquare()); + auto expected=std::inner_product( ivect.data(),ivect.data()+ivect.size(), ivect.data(), T(0)); + TEST_EQUAL(tr, expected,res); + } + TransformReduceUnaryFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} +template + void test_transform_reduce_type_binary(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + using vector_aligned_u = std::vector::alignment>>; + { + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)1); + vector_aligned_u ivect2(50); + std::iota(begin(ivect2),end(ivect2),(T)1); + auto res = transform_reduce( ivect.data(),ivect.data()+ivect.size(),ivect2.data(),T(1), BinaryPredicatePlus(), BinaryPredicateMul()); + auto expected = std::inner_product( ivect.data(), ivect.data()+ivect.size(), ivect2.data(), T(1)); + TEST_EQUAL(tr, expected,res); + } + TransformReduceBinaryFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_transform_reduce(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("transform_reduce"); + test_transform_reduce_type_unary(ts, tr); + test_transform_reduce_type_unary(ts, tr); + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + + test_transform_reduce_type_binary(ts, tr); + test_transform_reduce_type_binary(ts, tr); + // test_transform_reduce_type_binary(ts,tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/utils/test_helpers.h b/test/utils/test_helpers.h index e6617964..bd6ce269 100644 --- a/test/utils/test_helpers.h +++ b/test/utils/test_helpers.h @@ -1,5 +1,5 @@ /* Copyright (C) 2012 Povilas Kanapickas - + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -16,6 +16,9 @@ #include "test_reporter.h" #include #include +#include +#include +#include inline void set_round_to_zero() @@ -90,6 +93,7 @@ class TestData { TestData& operator=(const TestData& other) { data_ = other.data_; + return *this; } template @@ -115,7 +119,6 @@ class TestData { std::vector> data_; }; - /* A bunch of overloads that wrap the TestSuite::push() method. The push() method accepts a type enum plus a pointer; the wrapper overloads determine the type enum from the type of the supplied argument. @@ -282,6 +285,90 @@ void print_vector_numeric(std::ostream& out, const V& v) print_vector_numeric(out, GetElementType::value, v.length, block.data()); } + +//TR to be moved elsewhere ? + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + +template +struct GeneratorIota +{ + GeneratorIota(T start) { m_current = start; } + T operator()() { return ++m_current; } + T m_current; +}; + +template +struct GeneratorRandom; + +template <> +struct GeneratorRandom +{ + GeneratorRandom() :m_inner_random_generator(std::random_device()()), m_dis(0, UINT8_MAX) {} + uint8_t operator()() { return m_dis(m_inner_random_generator); } + + std::mt19937 m_inner_random_generator; + std::uniform_int_distribution m_dis; +}; + +template<> +struct GeneratorRandom +{ + GeneratorRandom() :m_inner_random_generator(std::random_device()()), m_dis(INT8_MIN, INT8_MAX) {} + int8_t operator()() { return m_dis(m_inner_random_generator); } + + std::mt19937 m_inner_random_generator; + std::uniform_int_distribution m_dis; +}; + +template +struct GeneratorRandom::value>::type> +{ + GeneratorRandom() :m_inner_random_generator(std::random_device()()), m_dis() {} + T operator()() + { + return m_dis(m_inner_random_generator); + } + std::mt19937 m_inner_random_generator; + std::uniform_int_distribution m_dis; +}; + +template +struct GeneratorRandom::value>::type> +{ + GeneratorRandom() :m_inner_random_generator(std::random_device()()), m_dis() {} + T operator()() + { + return m_dis(m_inner_random_generator); + } + std::mt19937 m_inner_random_generator; + std::uniform_real_distribution m_dis; +}; + +template +//decltype(auto) DataGeneratorAligned(std::size_t size, Generator gen) +std::vector::alignment>> DataGeneratorAligned(std::size_t size, Generator gen) +{ + std::vector::alignment>> vect(size); + std::generate(vect.begin(), vect.end(), gen); + return vect; +} + +template +//decltype(auto) DataGenerator(std::size_t size, Generator gen) +std::vector DataGenerator(std::size_t size, Generator gen) +{ + std::vector vect(size); + std::generate(vect.begin(), vect.end(), gen); + return vect; +} + } // namespace SIMDPP_ARCH_NAMESPACE // we are supposed to call this from within the test function which is in @@ -687,6 +774,43 @@ void test_cmp_equal(TestReporter& tr, const T1& a1, const T2& a2, line, file); } + +template +void test_cmp_equal_collections_impl(TestReporter& tr, + const Container1& a1, const Container2& a2, + bool expected_equal, unsigned line, const char* file) +{ + bool sucess_size= (a1.size()== a2.size()); + if (!sucess_size) { + tr.add_result(false); + print_separator(tr.out()); + print_file_info(tr.out(), file, line); + tr.out() << " Container Size not equal:\n"; + tr.out() << " Container1 Size is:"< +void test_cmp_equal_collections(TestReporter& tr, const Container1& a1, const Container2& a2, + bool expected_equal, unsigned line, const char* file) +{ + static_assert(std::is_same::value, //TR to be relaxed for comparable types? + "Invalid types for comparison"); + test_cmp_equal_collections_impl(tr, a1, a2, expected_equal,line, file); +} + #define TEST_EQUAL(TR, V1, V2) \ do { test_cmp_equal(TR, V1, V2, true, __LINE__, __FILE__); } while(0) @@ -699,4 +823,8 @@ void test_cmp_equal(TestReporter& tr, const T1& a1, const T2& a2, #define TEST_NOT_EQUAL_MEMORY(TR, E1, E2, COUNT) \ do { test_cmp_memory((TR), (E1), (E2), (COUNT), false, __LINE__, __FILE__); } while(0) +#define TEST_EQUAL_COLLECTIONS(TR, C1, C2) \ + do { test_cmp_equal_collections(TR, C1, C2, true, __LINE__, __FILE__); } while(0) + + #endif diff --git a/test/utils/test_results_set.cc b/test/utils/test_results_set.cc index 24ef4f9f..5d513e69 100644 --- a/test/utils/test_results_set.cc +++ b/test/utils/test_results_set.cc @@ -24,15 +24,15 @@ TestResultsSet::TestResultsSet(const char* name) : reset_seq(); } -TestResultsSet::Result& TestResultsSet::push(ElementType type, unsigned length, - const char* file, unsigned line) +TestResultsSet::Result& TestResultsSet::push(ElementType type, std::size_t length, + const char* file, std::size_t line) { results_.emplace_back(type, length, element_size_for_type(type), file, line, seq_++, curr_precision_ulp_, curr_fp_zero_equal_); return results_.back(); } -unsigned precision_for_result(const TestResultsSet::Result& res) +std::size_t precision_for_result(const TestResultsSet::Result& res) { switch (res.type) { case TYPE_FLOAT32: @@ -48,13 +48,13 @@ template<> struct fix_char_type { using type = int; }; template<> struct fix_char_type { using type = int; }; template -void print_hex(std::ostream& err, unsigned num_elems, unsigned width, +void print_hex(std::ostream& err, std::size_t num_elems, std::size_t width, const T* p) { static_assert(std::is_unsigned::value, "T must be unsigned"); err << "[ " << std::hex << std::setfill('0'); err.precision(width); - for (unsigned i = 0; i < num_elems; i++, p++) { + for (std::size_t i = 0; i < num_elems; i++, p++) { err << std::setw(width*2) << uint64_t(*p); if (i != num_elems - 1) { err << " ; "; @@ -65,12 +65,12 @@ void print_hex(std::ostream& err, unsigned num_elems, unsigned width, } template -void print_numeric(std::ostream& err, unsigned num_elems, unsigned precision, +void print_numeric(std::ostream& err, std::size_t num_elems, std::size_t precision, const T* p) { err << "[ "; err.precision(precision); - for (unsigned i = 0; i < num_elems; i++, p++) { + for (std::size_t i = 0; i < num_elems; i++, p++) { err << typename fix_char_type::type(*p); if (i != num_elems - 1) { err << " ; "; @@ -80,7 +80,7 @@ void print_numeric(std::ostream& err, unsigned num_elems, unsigned precision, err << std::dec; } -void print_vector_hex(std::ostream& out, ElementType type, unsigned num_elems, +void print_vector_hex(std::ostream& out, ElementType type, std::size_t num_elems, const void* data) { switch (type) { @@ -118,7 +118,7 @@ void print_vector_hex(std::ostream& out, ElementType type, unsigned num_elems, } void print_vector_numeric(std::ostream& out, ElementType type, - unsigned num_elems, const void* data) + std::size_t num_elems, const void* data) { switch (type) { case TYPE_UINT8: @@ -171,7 +171,7 @@ const char* vector_type_to_str(ElementType type) } } -void print_data_diff(std::ostream& out, ElementType type, unsigned num_elems, +void print_data_diff(std::ostream& out, ElementType type, std::size_t num_elems, const void* data_a, const void* data_b) { out << "type: " << vector_type_to_str(type) @@ -202,7 +202,7 @@ void print_file_info(std::ostream& out, const char* file) out << " In file \"" << file << "\" :\n"; } -void print_file_info(std::ostream& out, const char* file, unsigned line) +void print_file_info(std::ostream& out, const char* file, std::size_t line) { if (file == nullptr) { file = ""; @@ -220,12 +220,12 @@ void print_test_case_name(std::ostream& out, const char* name) out << " In test case \"" << name << "\" :\n"; } -void print_seq_num(std::ostream& out, unsigned num) +void print_seq_num(std::ostream& out, std::size_t num) { out << " Sequence number: " << num << "\n"; } -void print_precision(std::ostream& out, unsigned prec) +void print_precision(std::ostream& out, std::size_t prec) { if (prec > 0) { out << " Precision: " << prec << "ULP\n"; @@ -304,10 +304,10 @@ T nextafter_ulps(T from, T to) // T is either double or float template -bool cmpeq_arrays(const T* a, const T* b, unsigned num_elems, - unsigned prec, bool zero_eq) +bool cmpeq_arrays(const T* a, const T* b, std::size_t num_elems, + std::size_t prec, bool zero_eq) { - for (unsigned i = 0; i < num_elems; i++) { + for (std::size_t i = 0; i < num_elems; i++) { // we need to be extra-precise here. nextafter is used because it won't // introduce any rounding errors T ia = *a++; @@ -318,7 +318,7 @@ bool cmpeq_arrays(const T* a, const T* b, unsigned num_elems, if (zero_eq && is_zero_or_neg_zero(ia) && is_zero_or_neg_zero(ib)) { continue; } - for (unsigned i = 0; i < prec; i++) { + for (std::size_t i = 0; i < prec; i++) { ia = nextafter_ulps(ia, ib); } if (std::memcmp(&ia, &ib, sizeof(ia)) != 0) { @@ -345,12 +345,12 @@ const char* get_filename_from_results_set(const TestResultsSet& a, } struct TestSequence { - unsigned begin_index, end_index; + std::size_t begin_index, end_index; const char* begin_file; // For comparisons we want to strip the arch suffix from the file name. // To reduce the number of duplicate computations it is cached here. std::string begin_file_stripped; - unsigned begin_line; + std::size_t begin_line; }; bool is_test_seq_from_same_test(const TestSequence& a, const TestSequence& b) @@ -370,25 +370,25 @@ using TestSequenceList = std::vector; Returns true if test results were skipped, false otherwise. */ -bool skip_results_until_same_test(unsigned& ia, unsigned& ib, +bool skip_results_until_same_test(std::size_t& ia, std::size_t& ib, const TestSequenceList& a, const TestSequenceList& b) { if (is_test_seq_from_same_test(a[ia], b[ib])) return false; - unsigned max_skipped = a.size() - ia + b.size() - ib; + auto max_skipped = a.size() - ia + b.size() - ib; // This problem is solved by brute force as the number of skipped sequences // is very likely small. We evaluate all possible ways to skip sequences // starting with the smallest total number of skipped sequences. - for (unsigned num_skipped = 1; num_skipped < max_skipped; ++num_skipped) { + for (auto num_skipped = 1; num_skipped < max_skipped; ++num_skipped) { - for (unsigned i = 0; i <= num_skipped; ++i) { - unsigned skip_from_a = i; - unsigned skip_from_b = num_skipped - i; + for (auto i = 0; i <= num_skipped; ++i) { + auto skip_from_a = i; + auto skip_from_b = num_skipped - i; - unsigned new_ia = ia + skip_from_a; - unsigned new_ib = ib + skip_from_b; + auto new_ia = ia + skip_from_a; + auto new_ib = ib + skip_from_b; if (new_ia < a.size() && new_ib < b.size()) { if (is_test_seq_from_same_test(a[new_ia], b[new_ib])) { @@ -424,12 +424,12 @@ TestSequenceList build_test_sequences(const std::vector& TestSequence next_seq; - unsigned i = 0; + std::size_t i = 0; next_seq.begin_index = i; next_seq.begin_file = results[i].file; next_seq.begin_file_stripped = strip_arch_suffix_from_file(results[i].file); next_seq.begin_line = results[i].line; - unsigned last_seq_num = results[i].seq; + auto last_seq_num = results[i].seq; ++i; @@ -453,7 +453,7 @@ TestSequenceList build_test_sequences(const std::vector& } bool cmpeq_result(const TestResultsSet::Result& ia, const TestResultsSet::Result& ib, - unsigned fp_prec, bool fp_zero_eq) + std::size_t fp_prec, bool fp_zero_eq) { if (std::memcmp(ia.d(), ib.d(), ia.el_size * ia.length) == 0) { return true; @@ -491,8 +491,8 @@ void report_test_comparison(const TestResultsSet& a, const char* a_arch, TestSequenceList b_seqs = build_test_sequences(b.results()); // Compare results - unsigned ia_seq = 0; - unsigned ib_seq = 0; + std::size_t ia_seq = 0u; + std::size_t ib_seq = 0u; while (ia_seq < a_seqs.size() && ib_seq < b_seqs.size()) { if (skip_results_until_same_test(ia_seq, ib_seq, a_seqs, b_seqs)) { @@ -502,8 +502,8 @@ void report_test_comparison(const TestResultsSet& a, const char* a_arch, const auto& a_seq = a_seqs[ia_seq]; const auto& b_seq = b_seqs[ib_seq]; - unsigned a_seq_size = a_seq.end_index - a_seq.begin_index; - unsigned b_seq_size = b_seq.end_index - b_seq.begin_index; + std::size_t a_seq_size = a_seq.end_index - a_seq.begin_index; + std::size_t b_seq_size = b_seq.end_index - b_seq.begin_index; if (a_seq_size != b_seq_size) { print_separator(tr.out()); @@ -522,9 +522,9 @@ void report_test_comparison(const TestResultsSet& a, const char* a_arch, return; } - for (unsigned i = 0; i < a_seq_size; ++i) { - unsigned ia = a_seq.begin_index + i; - unsigned ib = b_seq.begin_index + i; + for (auto i = 0; i < a_seq_size; ++i) { + auto ia = a_seq.begin_index + i; + auto ib = b_seq.begin_index + i; const auto& a_res = a.results()[ia]; const auto& b_res = b.results()[ib]; @@ -573,7 +573,7 @@ void report_test_comparison(const TestResultsSet& a, const char* a_arch, return; } - unsigned prec = std::max(precision_for_result(a_res), + std::size_t prec = std::max(precision_for_result(a_res), precision_for_result(b_res)); bool fp_zero_eq = a_res.fp_zero_eq || b_res.fp_zero_eq; diff --git a/test/utils/test_results_set.h b/test/utils/test_results_set.h index e643398d..92b3bd1b 100644 --- a/test/utils/test_results_set.h +++ b/test/utils/test_results_set.h @@ -16,18 +16,18 @@ #include "element_type.h" // Prints two vectors side by side for comparison -void print_data_diff(std::ostream& out, ElementType type, unsigned num_elems, +void print_data_diff(std::ostream& out, ElementType type, std::size_t num_elems, const void* data_a, const void* data_b); void print_separator(std::ostream& out); void print_file_info(std::ostream& out, const char* file); -void print_file_info(std::ostream& out, const char* file, unsigned line); +void print_file_info(std::ostream& out, const char* file, std::size_t line); -void print_vector_hex(std::ostream& out, ElementType type, unsigned num_elems, +void print_vector_hex(std::ostream& out, ElementType type, std::size_t num_elems, const void* data); void print_vector_numeric(std::ostream& out, ElementType type, - unsigned num_elems, const void* data); + std::size_t num_elems, const void* data); /** The class represents test results for certain instruction set. We later compare the results with other instruction sets and assume that all @@ -40,11 +40,11 @@ class TestResultsSet { // Holds one result vector struct Result { - static const unsigned num_bytes = 32; + static const std::size_t num_bytes = 32; - Result(ElementType atype, unsigned alength, unsigned ael_size, - const char* afile, unsigned aline, unsigned aseq, - unsigned aprec_ulp, bool afp_zero_eq) + Result(ElementType atype, std::size_t alength, std::size_t ael_size, + const char* afile, std::size_t aline, std::size_t aseq, + std::size_t aprec_ulp, bool afp_zero_eq) { type = atype; file = afile; @@ -58,15 +58,15 @@ class TestResultsSet { } ElementType type; - unsigned line; - unsigned seq; - unsigned prec_ulp; + std::size_t line; + std::size_t seq; + std::size_t prec_ulp; bool fp_zero_eq; const char* file; - unsigned length; - unsigned el_size; + std::size_t length; + std::size_t el_size; - void set(unsigned id, void* adata) + void set(std::size_t id, void* adata) { std::memcpy(data.data() + id*el_size, adata, el_size); } @@ -80,11 +80,11 @@ class TestResultsSet { }; /// Stores the results into the results set. - Result& push(ElementType type, unsigned length, const char* file, unsigned line); + Result& push(ElementType type, std::size_t length, const char* file, std::size_t line); /// Sets the allowed error in ULPs. Only meaningful for floating-point data. /// Affects all pushed data until the next call to @a unset_precision - void set_precision(unsigned num_ulp) { curr_precision_ulp_ = num_ulp; } + void set_precision(std::size_t num_ulp) { curr_precision_ulp_ = num_ulp; } void unset_precision() { curr_precision_ulp_ = 0; } /// Sets whether floating-point zero and negative zero are considered @@ -106,8 +106,8 @@ class TestResultsSet { TestResultsSet(const char* name); const char* name_; - unsigned seq_; - unsigned curr_precision_ulp_; + std::size_t seq_; + std::size_t curr_precision_ulp_; bool curr_fp_zero_equal_; std::vector results_;