Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-45819: [C++] Add OptionalBitmapAnd utility function #45869

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 73 additions & 15 deletions cpp/src/arrow/util/bit_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ using internal::BitsetStack;
using internal::CopyBitmap;
using internal::CountSetBits;
using internal::InvertBitmap;
using internal::OptionalBitmapAnd;
using internal::ReverseBitmap;
using util::SafeCopy;

Expand Down Expand Up @@ -1272,6 +1273,24 @@ struct BitmapOperation {
virtual ~BitmapOperation() = default;
};

struct OptionalBitmapAndOp : public BitmapOperation {
Result<std::shared_ptr<Buffer>> Call(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length,
int64_t out_offset) const override {
return OptionalBitmapAnd(pool, left, left_offset, right, right_offset, length,
out_offset);
}

Status Call(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out_buffer) const override {
OptionalBitmapAnd(left, left_offset, right, right_offset, length, out_offset,
out_buffer);
return Status::OK();
}
};

struct BitmapAndOp : public BitmapOperation {
Result<std::shared_ptr<Buffer>> Call(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
Expand Down Expand Up @@ -1342,23 +1361,34 @@ class BitmapOp : public ::testing::Test {
const std::vector<int>& right_bits,
const std::vector<int>& result_bits) {
std::shared_ptr<Buffer> left, right, out;
int64_t length;
int64_t length{0};
uint8_t *left_buffer, *right_buffer;

for (int64_t left_offset : {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536}) {
BitmapFromVector(left_bits, left_offset, &left, &length);
if (left_bits.size() > 0) {
BitmapFromVector(left_bits, left_offset, &left, &length);
left_buffer = left->mutable_data();
} else {
left_buffer = nullptr;
}
for (int64_t right_offset : {left_offset, left_offset + 8, left_offset + 40}) {
BitmapFromVector(right_bits, right_offset, &right, &length);
if (right_bits.size() > 0) {
BitmapFromVector(right_bits, right_offset, &right, &length);
right_buffer = right->mutable_data();
} else {
right_buffer = nullptr;
}
for (int64_t out_offset : {left_offset, left_offset + 16, left_offset + 24}) {
ASSERT_OK_AND_ASSIGN(
out, op.Call(default_memory_pool(), left->mutable_data(), left_offset,
right->mutable_data(), right_offset, length, out_offset));
out, op.Call(default_memory_pool(), left_buffer, left_offset, right_buffer,
right_offset, length, out_offset));
auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);

// Clear out buffer and try non-allocating version
std::memset(out->mutable_data(), 0, out->size());
ASSERT_OK(op.Call(left->mutable_data(), left_offset, right->mutable_data(),
right_offset, length, out_offset, out->mutable_data()));
ASSERT_OK(op.Call(left_buffer, left_offset, right_buffer, right_offset, length,
out_offset, out->mutable_data()));
reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
}
Expand All @@ -1370,26 +1400,36 @@ class BitmapOp : public ::testing::Test {
const std::vector<int>& right_bits,
const std::vector<int>& result_bits) {
std::shared_ptr<Buffer> left, right, out;
int64_t length;
int64_t length{0};
uint8_t *left_buffer, *right_buffer;
auto offset_values = {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536};

for (int64_t left_offset : offset_values) {
BitmapFromVector(left_bits, left_offset, &left, &length);
if (left_bits.size() > 0) {
BitmapFromVector(left_bits, left_offset, &left, &length);
left_buffer = left->mutable_data();
} else {
left_buffer = nullptr;
}

for (int64_t right_offset : offset_values) {
BitmapFromVector(right_bits, right_offset, &right, &length);

if (right_bits.size() > 0) {
BitmapFromVector(right_bits, right_offset, &right, &length);
right_buffer = right->mutable_data();
} else {
right_buffer = nullptr;
}
for (int64_t out_offset : offset_values) {
ASSERT_OK_AND_ASSIGN(
out, op.Call(default_memory_pool(), left->mutable_data(), left_offset,
right->mutable_data(), right_offset, length, out_offset));
out, op.Call(default_memory_pool(), left_buffer, left_offset, right_buffer,
right_offset, length, out_offset));
auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);

// Clear out buffer and try non-allocating version
std::memset(out->mutable_data(), 0, out->size());
ASSERT_OK(op.Call(left->mutable_data(), left_offset, right->mutable_data(),
right_offset, length, out_offset, out->mutable_data()));
ASSERT_OK(op.Call(left_buffer, left_offset, right_buffer, right_offset, length,
out_offset, out->mutable_data()));
reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
}
Expand All @@ -1398,6 +1438,24 @@ class BitmapOp : public ::testing::Test {
}
};

TEST_F(BitmapOp, OptionalAnd) {
OptionalBitmapAndOp op;
std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
std::vector<int> result = {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0};

TestAligned(op, left, right, result);
TestUnaligned(op, left, right, result);

result = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
TestAligned(op, {}, right, result);
TestUnaligned(op, {}, right, result);
TestAligned(op, left, {}, result);
TestUnaligned(op, left, {}, result);
TestAligned(op, {}, {}, {});
TestUnaligned(op, {}, {}, {});
}

TEST_F(BitmapOp, And) {
BitmapAndOp op;
std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
Expand Down
26 changes: 26 additions & 0 deletions cpp/src/arrow/util/bitmap_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,32 @@ Result<std::shared_ptr<Buffer>> BitmapOp(MemoryPool* pool, const uint8_t* left,

} // namespace

Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool, const uint8_t* left,
int64_t left_offset,
const uint8_t* right,
int64_t right_offset, int64_t length,
int64_t out_offset) {
if (left == nullptr || right == nullptr) {
const int64_t phys_bits = length + out_offset;
ARROW_ASSIGN_OR_RAISE(auto out_buffer, AllocateEmptyBitmap(phys_bits, pool));
return out_buffer;
} else {
return BitmapOp<std::bit_and>(pool, left, left_offset, right, right_offset, length,
out_offset);
}
}

void OptionalBitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out) {
if (left == nullptr || right == nullptr) {
bit_util::ClearBitmap(out, out_offset, length);
} else {
BitmapOp<std::bit_and>(left, left_offset, right, right_offset, length, out_offset,
out);
}
}

Result<std::shared_ptr<Buffer>> BitmapAnd(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length,
Expand Down
25 changes: 25 additions & 0 deletions cpp/src/arrow/util/bitmap_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,31 @@ bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offs
const std::shared_ptr<Buffer>& right, int64_t right_offset,
int64_t length);

/// \brief Do a "bitmap and" on right and left buffers starting at
/// their respective bit-offsets for the given bit-length and put
/// the results in out_buffer starting at the given bit-offset.
/// Both right and left buffers are optional. If any of the buffers is
/// null a bitmap of zeros is returned.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, actually, the more useful semantics is that a null pointer means the bitmap is all-1s.

This reflects the situation where an Array doesn't have a null bitmap: all values are valid.

Copy link
Member

@pitrou pitrou Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this would be even more useful as:

Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(
    MemoryPool* pool, const std::shared_ptr<Buffer>& left, int64_t left_offset,
    const std::shared_ptr<Buffer>& right, int64_t right_offset, int64_t out_offset);

... because then, if one of the inputs is null, and the offsets are compatible, we can return the other input (perhaps sliced) instead of allocating a new buffer.

///
/// out_buffer will be allocated and initialized to zeros using pool before
/// the operation.
ARROW_EXPORT
Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool, const uint8_t* left,
int64_t left_offset,
const uint8_t* right,
int64_t right_offset, int64_t length,
int64_t out_offset);

/// \brief Do a "bitmap and" on right and left buffers starting at
/// their respective bit-offsets for the given bit-length and put
/// the results in out starting at the given bit-offset.
/// Both right and left buffers are optional. If any of the buffers is
/// null a bitmap of zeros is returned.
ARROW_EXPORT
void OptionalBitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out);

/// \brief Do a "bitmap and" on right and left buffers starting at
/// their respective bit-offsets for the given bit-length and put
/// the results in out_buffer starting at the given bit-offset.
Expand Down
Loading