Skip to content

Commit 2f92a5b

Browse files
committed
apacheGH-44846: [C++] Fix thread-unsafe access in ConcurrentQueue::UnsyncFront
1 parent 71389f8 commit 2f92a5b

File tree

3 files changed

+18
-17
lines changed

3 files changed

+18
-17
lines changed

cpp/src/arrow/acero/asof_join_node.cc

+4-5
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ class InputState : public util::SerialSequencingQueue::Processor {
567567

568568
// Gets latest batch (precondition: must not be empty)
569569
const std::shared_ptr<arrow::RecordBatch>& GetLatestBatch() const {
570-
return queue_.UnsyncFront();
570+
return queue_.Front();
571571
}
572572

573573
#define LATEST_VAL_CASE(id, val) \
@@ -634,15 +634,14 @@ class InputState : public util::SerialSequencingQueue::Processor {
634634
}
635635
latest_time_ = next_time;
636636
// If we have an active batch
637-
if (++latest_ref_row_ >= (row_index_t)queue_.UnsyncFront()->num_rows()) {
637+
if (++latest_ref_row_ >= (row_index_t)queue_.Front()->num_rows()) {
638638
// hit the end of the batch, need to get the next batch if possible.
639639
++batches_processed_;
640640
latest_ref_row_ = 0;
641641
have_active_batch &= !queue_.TryPop();
642642
if (have_active_batch) {
643-
DCHECK_GT(queue_.UnsyncFront()->num_rows(), 0); // empty batches disallowed
644-
memo_.UpdateTime(GetTime(queue_.UnsyncFront().get(), time_type_id_,
645-
time_col_index_,
643+
DCHECK_GT(queue_.Front()->num_rows(), 0); // empty batches disallowed
644+
memo_.UpdateTime(GetTime(queue_.Front().get(), time_type_id_, time_col_index_,
646645
0)); // time changed
647646
}
648647
}

cpp/src/arrow/acero/concurrent_queue_internal.h

+11-9
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,19 @@ class ConcurrentQueue {
6565
return queue_.empty();
6666
}
6767

68-
// Un-synchronized access to front
69-
// For this to be "safe":
70-
// 1) the caller logically guarantees that queue is not empty
71-
// 2) pop/try_pop cannot be called concurrently with this
72-
const T& UnsyncFront() const { return queue_.front(); }
73-
74-
size_t UnsyncSize() const { return queue_.size(); }
68+
const T& Front() const {
69+
// Need to lock the queue because `front()` may be implemented in terms
70+
// of `begin()`, which isn't safe with concurrent calls to e.g. `push()`.
71+
// (see GH-44846)
72+
std::unique_lock<std::mutex> lock(mutex_);
73+
return queue_.front();
74+
}
7575

7676
protected:
7777
std::mutex& GetMutex() { return mutex_; }
7878

79+
size_t SizeUnlocked() const { return queue_.size(); }
80+
7981
T PopUnlocked() {
8082
auto item = queue_.front();
8183
queue_.pop();
@@ -111,12 +113,12 @@ class BackpressureConcurrentQueue : public ConcurrentQueue<T> {
111113
private:
112114
struct DoHandle {
113115
explicit DoHandle(BackpressureConcurrentQueue& queue)
114-
: queue_(queue), start_size_(queue_.UnsyncSize()) {}
116+
: queue_(queue), start_size_(queue_.SizeUnlocked()) {}
115117

116118
~DoHandle() {
117119
// unsynced access is safe since DoHandle is internally only used when the
118120
// lock is held
119-
size_t end_size = queue_.UnsyncSize();
121+
size_t end_size = queue_.SizeUnlocked();
120122
queue_.handler_.Handle(start_size_, end_size);
121123
}
122124

cpp/src/arrow/acero/sorted_merge_node.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ class InputState {
145145

146146
// Gets latest batch (precondition: must not be empty)
147147
const std::shared_ptr<arrow::RecordBatch>& GetLatestBatch() const {
148-
return queue_.UnsyncFront();
148+
return queue_.Front();
149149
}
150150

151151
#define LATEST_VAL_CASE(id, val) \
@@ -178,7 +178,7 @@ class InputState {
178178
row_index_t start = latest_ref_row_;
179179
row_index_t end = latest_ref_row_;
180180
time_unit_t startTime = GetLatestTime();
181-
std::shared_ptr<arrow::RecordBatch> batch = queue_.UnsyncFront();
181+
std::shared_ptr<arrow::RecordBatch> batch = queue_.Front();
182182
auto rows_in_batch = (row_index_t)batch->num_rows();
183183

184184
while (GetLatestTime() == startTime) {
@@ -190,7 +190,7 @@ class InputState {
190190
latest_ref_row_ = 0;
191191
active &= !queue_.TryPop();
192192
if (active) {
193-
DCHECK_GT(queue_.UnsyncFront()->num_rows(),
193+
DCHECK_GT(queue_.Front()->num_rows(),
194194
0); // empty batches disallowed, sanity check
195195
}
196196
break;

0 commit comments

Comments
 (0)