Skip to content

GH-45819: [C++] Add OptionalBitmapAnd utility function #45869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 118 additions & 29 deletions cpp/src/arrow/util/bit_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ using internal::BitsetStack;
using internal::CopyBitmap;
using internal::CountSetBits;
using internal::InvertBitmap;
using internal::OptionalBitmapAnd;
using internal::ReverseBitmap;
using util::SafeCopy;

Expand Down Expand Up @@ -1272,6 +1273,32 @@ struct BitmapOperation {
virtual ~BitmapOperation() = default;
};

struct OptionalBitmapAndOp : public BitmapOperation {
Result<std::shared_ptr<Buffer>> Call(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length,
int64_t out_offset) const override {
std::shared_ptr<Buffer> left_buffer, right_buffer;
if (right != nullptr) {
ARROW_ASSIGN_OR_RAISE(right_buffer,
CopyBitmap(pool, right, 0, length + right_offset));
}
if (left != nullptr) {
ARROW_ASSIGN_OR_RAISE(left_buffer, CopyBitmap(pool, left, 0, length + left_offset));
}

return OptionalBitmapAnd(pool, left_buffer, left_offset, right_buffer, right_offset,
length, out_offset);
}

Status Call(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out_buffer) const override {
return Status::NotImplemented(
"OptionalBitmapAnd does not implement non-allocation buffer version");
}
};

struct BitmapAndOp : public BitmapOperation {
Result<std::shared_ptr<Buffer>> Call(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
Expand Down Expand Up @@ -1342,25 +1369,48 @@ class BitmapOp : public ::testing::Test {
const std::vector<int>& right_bits,
const std::vector<int>& result_bits) {
std::shared_ptr<Buffer> left, right, out;
int64_t length;
int64_t length{0};
uint8_t *left_data, *right_data;

for (int64_t left_offset : {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536}) {
BitmapFromVector(left_bits, left_offset, &left, &length);
if (left_bits.size() > 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is putting too much logic in these test methods. I would suggest something else:

  1. test OptionalBitmapAnd with both non-null arguments like you do here
  2. have separate tests for when one or both of the arguments is null

BitmapFromVector(left_bits, left_offset, &left, &length);
left_data = left->mutable_data();
} else {
left_data = nullptr;
}
for (int64_t right_offset : {left_offset, left_offset + 8, left_offset + 40}) {
BitmapFromVector(right_bits, right_offset, &right, &length);
if (right_bits.size() > 0) {
BitmapFromVector(right_bits, right_offset, &right, &length);
right_data = right->mutable_data();
} else {
right_data = nullptr;
}
for (int64_t out_offset : {left_offset, left_offset + 16, left_offset + 24}) {
ASSERT_OK_AND_ASSIGN(
out, op.Call(default_memory_pool(), left->mutable_data(), left_offset,
right->mutable_data(), right_offset, length, out_offset));
auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);

// Clear out buffer and try non-allocating version
std::memset(out->mutable_data(), 0, out->size());
ASSERT_OK(op.Call(left->mutable_data(), left_offset, right->mutable_data(),
right_offset, length, out_offset, out->mutable_data()));
reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
out, op.Call(default_memory_pool(), left_data, left_offset, right_data,
right_offset, length, out_offset));
if (out == nullptr) {
ASSERT_EQ(std::vector<int>{}, result_bits);
} else {
auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);

// Clear out buffer and try non-allocating version
std::memset(out->mutable_data(), 0, out->size());
auto status = op.Call(left_data, left_offset, right_data, right_offset,
length, out_offset, out->mutable_data());
if (status.IsNotImplemented()) {
ASSERT_EQ(
status.message(),
"OptionalBitmapAnd does not implement non-allocation buffer version");
continue;
} else {
ASSERT_OK(status);
reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
}
}
}
}
}
Expand All @@ -1370,34 +1420,73 @@ class BitmapOp : public ::testing::Test {
const std::vector<int>& right_bits,
const std::vector<int>& result_bits) {
std::shared_ptr<Buffer> left, right, out;
int64_t length;
int64_t length{0};
uint8_t *left_data, *right_data;
auto offset_values = {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536};

for (int64_t left_offset : offset_values) {
BitmapFromVector(left_bits, left_offset, &left, &length);
if (left_bits.size() > 0) {
BitmapFromVector(left_bits, left_offset, &left, &length);
left_data = left->mutable_data();
} else {
left_data = nullptr;
}

for (int64_t right_offset : offset_values) {
BitmapFromVector(right_bits, right_offset, &right, &length);

if (right_bits.size() > 0) {
BitmapFromVector(right_bits, right_offset, &right, &length);
right_data = right->mutable_data();
} else {
right_data = nullptr;
}
for (int64_t out_offset : offset_values) {
ASSERT_OK_AND_ASSIGN(
out, op.Call(default_memory_pool(), left->mutable_data(), left_offset,
right->mutable_data(), right_offset, length, out_offset));
auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);

// Clear out buffer and try non-allocating version
std::memset(out->mutable_data(), 0, out->size());
ASSERT_OK(op.Call(left->mutable_data(), left_offset, right->mutable_data(),
right_offset, length, out_offset, out->mutable_data()));
reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
out, op.Call(default_memory_pool(), left_data, left_offset, right_data,
right_offset, length, out_offset));
if (out == nullptr) {
ASSERT_EQ(std::vector<int>{}, result_bits);
} else {
auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);

// Clear out buffer and try non-allocating version
std::memset(out->mutable_data(), 0, out->size());
auto status = op.Call(left_data, left_offset, right_data, right_offset,
length, out_offset, out->mutable_data());
if (status.IsNotImplemented()) {
ASSERT_EQ(
status.message(),
"OptionalBitmapAnd does not implement non-allocation buffer version");
continue;
} else {
ASSERT_OK(status);
reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
ASSERT_READER_VALUES(reader, result_bits);
}
}
}
}
}
}
};

TEST_F(BitmapOp, OptionalAnd) {
OptionalBitmapAndOp op;
std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
std::vector<int> result = {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0};

TestAligned(op, left, right, result);
TestUnaligned(op, left, right, result);

TestAligned(op, {}, right, right);
TestUnaligned(op, {}, right, right);
TestAligned(op, left, {}, left);
TestUnaligned(op, left, {}, left);
TestAligned(op, {}, {}, {});
TestUnaligned(op, {}, {}, {});
}

TEST_F(BitmapOp, And) {
BitmapAndOp op;
std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
Expand Down
25 changes: 25 additions & 0 deletions cpp/src/arrow/util/bitmap_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,31 @@ Result<std::shared_ptr<Buffer>> BitmapOp(MemoryPool* pool, const uint8_t* left,

} // namespace

Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
const std::shared_ptr<Buffer>& left,
int64_t left_offset,
const std::shared_ptr<Buffer>& right,
int64_t right_offset, int64_t length,
int64_t out_offset) {
if (left == nullptr && right == nullptr) {
return nullptr;
} else if (left == nullptr) {
if (right_offset == out_offset) {
return right;
}
Comment on lines +406 to +408
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could easily slice the input buffer if both offsets are equal modulo 8. It would avoid copying in slightly more cases.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be a dedicated function ViewOrCopyBitmap by the way. Search through the source code would probably find other places where such a function could be useful.

return CopyBitmap(pool, right->data(), right_offset, length, out_offset);
} else if (right == nullptr) {
if (left_offset == out_offset) {
return left;
}
return CopyBitmap(pool, left->data(), left_offset, length, out_offset);
} else {
return BitmapOp<std::bit_and>(pool, left->mutable_data(), left_offset,
right->mutable_data(), right_offset, length,
out_offset);
}
}

Result<std::shared_ptr<Buffer>> BitmapAnd(MemoryPool* pool, const uint8_t* left,
int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length,
Expand Down
15 changes: 15 additions & 0 deletions cpp/src/arrow/util/bitmap_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,21 @@ bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offs
const std::shared_ptr<Buffer>& right, int64_t right_offset,
int64_t length);

/// \brief Do a "bitmap and" on right and left buffers starting at
/// their respective bit-offsets for the given bit-length and return
/// the result buffer starting at the given bit-offset.
/// Both right and left buffers are optional. If one of the buffers is
/// null the non-null bitmap is returned if out_offset and input_offset are
/// compatible. If non-compatible a copy of the bitmap is performed.
// If both right and left are null a nullptr is returned.
ARROW_EXPORT
Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
const std::shared_ptr<Buffer>& left,
int64_t left_offset,
const std::shared_ptr<Buffer>& right,
int64_t right_offset, int64_t length,
int64_t out_offset);

/// \brief Do a "bitmap and" on right and left buffers starting at
/// their respective bit-offsets for the given bit-length and put
/// the results in out_buffer starting at the given bit-offset.
Expand Down
Loading