diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 02f583e0110b7..8799b8d6eed62 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -64,6 +64,7 @@ using internal::BitsetStack; using internal::CopyBitmap; using internal::CountSetBits; using internal::InvertBitmap; +using internal::OptionalBitmapAnd; using internal::ReverseBitmap; using util::SafeCopy; @@ -1272,6 +1273,24 @@ struct BitmapOperation { virtual ~BitmapOperation() = default; }; +struct OptionalBitmapAndOp : public BitmapOperation { + Result> Call(MemoryPool* pool, const uint8_t* left, + int64_t left_offset, const uint8_t* right, + int64_t right_offset, int64_t length, + int64_t out_offset) const override { + return OptionalBitmapAnd(pool, left, left_offset, right, right_offset, length, + out_offset); + } + + Status Call(const uint8_t* left, int64_t left_offset, const uint8_t* right, + int64_t right_offset, int64_t length, int64_t out_offset, + uint8_t* out_buffer) const override { + OptionalBitmapAnd(left, left_offset, right, right_offset, length, out_offset, + out_buffer); + return Status::OK(); + } +}; + struct BitmapAndOp : public BitmapOperation { Result> Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset, const uint8_t* right, @@ -1342,25 +1361,40 @@ class BitmapOp : public ::testing::Test { const std::vector& right_bits, const std::vector& result_bits) { std::shared_ptr left, right, out; - int64_t length; + int64_t length{0}; + uint8_t *left_buffer, *right_buffer; for (int64_t left_offset : {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536}) { - BitmapFromVector(left_bits, left_offset, &left, &length); + if (left_bits.size() > 0) { + BitmapFromVector(left_bits, left_offset, &left, &length); + left_buffer = left->mutable_data(); + } else { + left_buffer = nullptr; + } for (int64_t right_offset : {left_offset, left_offset + 8, left_offset + 40}) { - BitmapFromVector(right_bits, right_offset, &right, &length); + if (right_bits.size() > 0) { + BitmapFromVector(right_bits, right_offset, &right, &length); + right_buffer = right->mutable_data(); + } else { + right_buffer = nullptr; + } for (int64_t out_offset : {left_offset, left_offset + 16, left_offset + 24}) { ASSERT_OK_AND_ASSIGN( - out, op.Call(default_memory_pool(), left->mutable_data(), left_offset, - right->mutable_data(), right_offset, length, out_offset)); - auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length); - ASSERT_READER_VALUES(reader, result_bits); - - // Clear out buffer and try non-allocating version - std::memset(out->mutable_data(), 0, out->size()); - ASSERT_OK(op.Call(left->mutable_data(), left_offset, right->mutable_data(), - right_offset, length, out_offset, out->mutable_data())); - reader = internal::BitmapReader(out->mutable_data(), out_offset, length); - ASSERT_READER_VALUES(reader, result_bits); + out, op.Call(default_memory_pool(), left_buffer, left_offset, right_buffer, + right_offset, length, out_offset)); + if (out == nullptr) { + ASSERT_EQ(std::vector{}, result_bits); + // TODO(raulcd) This has to test the case of non-allocating buffer + } else { + auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length); + ASSERT_READER_VALUES(reader, result_bits); + // Clear out buffer and try non-allocating version + std::memset(out->mutable_data(), 0, out->size()); + ASSERT_OK(op.Call(left_buffer, left_offset, right_buffer, right_offset, + length, out_offset, out->mutable_data())); + reader = internal::BitmapReader(out->mutable_data(), out_offset, length); + ASSERT_READER_VALUES(reader, result_bits); + } } } } @@ -1370,34 +1404,67 @@ class BitmapOp : public ::testing::Test { const std::vector& right_bits, const std::vector& result_bits) { std::shared_ptr left, right, out; - int64_t length; + int64_t length{0}; + uint8_t *left_buffer, *right_buffer; auto offset_values = {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536}; for (int64_t left_offset : offset_values) { - BitmapFromVector(left_bits, left_offset, &left, &length); + if (left_bits.size() > 0) { + BitmapFromVector(left_bits, left_offset, &left, &length); + left_buffer = left->mutable_data(); + } else { + left_buffer = nullptr; + } for (int64_t right_offset : offset_values) { - BitmapFromVector(right_bits, right_offset, &right, &length); - + if (right_bits.size() > 0) { + BitmapFromVector(right_bits, right_offset, &right, &length); + right_buffer = right->mutable_data(); + } else { + right_buffer = nullptr; + } for (int64_t out_offset : offset_values) { ASSERT_OK_AND_ASSIGN( - out, op.Call(default_memory_pool(), left->mutable_data(), left_offset, - right->mutable_data(), right_offset, length, out_offset)); - auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length); - ASSERT_READER_VALUES(reader, result_bits); - - // Clear out buffer and try non-allocating version - std::memset(out->mutable_data(), 0, out->size()); - ASSERT_OK(op.Call(left->mutable_data(), left_offset, right->mutable_data(), - right_offset, length, out_offset, out->mutable_data())); - reader = internal::BitmapReader(out->mutable_data(), out_offset, length); - ASSERT_READER_VALUES(reader, result_bits); + out, op.Call(default_memory_pool(), left_buffer, left_offset, right_buffer, + right_offset, length, out_offset)); + if (out == nullptr) { + ASSERT_EQ(std::vector{}, result_bits); + // TODO: This has to test the case of non-allocating buffer + } else { + auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length); + ASSERT_READER_VALUES(reader, result_bits); + + // Clear out buffer and try non-allocating version + std::memset(out->mutable_data(), 0, out->size()); + ASSERT_OK(op.Call(left_buffer, left_offset, right_buffer, right_offset, + length, out_offset, out->mutable_data())); + reader = internal::BitmapReader(out->mutable_data(), out_offset, length); + ASSERT_READER_VALUES(reader, result_bits); + } } } } } }; +TEST_F(BitmapOp, OptionalAnd) { + OptionalBitmapAndOp op; + std::vector left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}; + std::vector right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0}; + std::vector result = {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}; + + TestAligned(op, left, right, result); + TestUnaligned(op, left, right, result); + + result = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + TestAligned(op, {}, right, right); + TestUnaligned(op, {}, right, right); + TestAligned(op, left, {}, left); + TestUnaligned(op, left, {}, left); + TestAligned(op, {}, {}, {}); + TestUnaligned(op, {}, {}, {}); +} + TEST_F(BitmapOp, And) { BitmapAndOp op; std::vector left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}; diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index d17b477e4c9ae..14632335c0b61 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -394,6 +394,41 @@ Result> BitmapOp(MemoryPool* pool, const uint8_t* left, } // namespace +Result> OptionalBitmapAnd(MemoryPool* pool, const uint8_t* left, + int64_t left_offset, + const uint8_t* right, + int64_t right_offset, int64_t length, + int64_t out_offset) { + if (left == nullptr && right == nullptr) { + return nullptr; + } else if (left == nullptr) { + return CopyBitmap(pool, right, right_offset, length, out_offset); + } else if (right == nullptr) { + return CopyBitmap(pool, left, left_offset, length, out_offset); + } else { + return BitmapOp(pool, left, left_offset, right, right_offset, length, + out_offset); + } +} + +void OptionalBitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right, + int64_t right_offset, int64_t length, int64_t out_offset, + uint8_t* out) { + if (left == nullptr && right == nullptr) { + return; + } else if (left == nullptr) { + // TODO(raulcd) this is obviously wrong. Investigate how to copy right to out. + BitmapOp(right, right_offset, right, right_offset, length, out_offset, + out); + } else if (right == nullptr) { + // TODO(raulcd) this is obviously wrong. Investigate how to copy left to out. + BitmapOp(left, left_offset, left, left_offset, length, out_offset, out); + } else { + BitmapOp(left, left_offset, right, right_offset, length, out_offset, + out); + } +} + Result> BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset, const uint8_t* right, int64_t right_offset, int64_t length, diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h index ac05bc87b3073..f01a66f0e02cf 100644 --- a/cpp/src/arrow/util/bitmap_ops.h +++ b/cpp/src/arrow/util/bitmap_ops.h @@ -147,6 +147,28 @@ bool OptionalBitmapEquals(const std::shared_ptr& left, int64_t left_offs const std::shared_ptr& right, int64_t right_offset, int64_t length); +/// \brief Do a "bitmap and" on right and left buffers starting at +/// their respective bit-offsets for the given bit-length and put +/// the results in out_buffer starting at the given bit-offset. +/// Both right and left buffers are optional. If one of the buffers is +/// null the non-null bitmap is returned. If both are null a nullptr is returned. +ARROW_EXPORT +Result> OptionalBitmapAnd(MemoryPool* pool, const uint8_t* left, + int64_t left_offset, + const uint8_t* right, + int64_t right_offset, int64_t length, + int64_t out_offset); + +/// \brief Do a "bitmap and" on right and left buffers starting at +/// their respective bit-offsets for the given bit-length and put +/// the results in out starting at the given bit-offset. +/// Both right and left buffers are optional. If one of the buffers is +/// null the non-null bitmap is copied. If both are null out is not modified. +ARROW_EXPORT +void OptionalBitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right, + int64_t right_offset, int64_t length, int64_t out_offset, + uint8_t* out); + /// \brief Do a "bitmap and" on right and left buffers starting at /// their respective bit-offsets for the given bit-length and put /// the results in out_buffer starting at the given bit-offset.