Skip to content

Commit 429d060

Browse files
committed
WIP
1 parent b6c4fbc commit 429d060

9 files changed

+274
-27
lines changed

cpp/src/arrow/io/file_test.cc

+5
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,11 @@ class MyMemoryPool : public MemoryPool {
461461
return Status::OK();
462462
}
463463

464+
Status ReallocateNoCopy(int64_t old_size, int64_t new_size, int64_t alignment,
465+
uint8_t** ptr) override {
466+
return Reallocate(old_size, new_size, alignment, ptr);
467+
}
468+
464469
int64_t bytes_allocated() const override { return -1; }
465470

466471
int64_t total_bytes_allocated() const override { return -1; }

cpp/src/arrow/memory_pool.cc

+91
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,27 @@ class DebugAllocator {
261261
return Status::OK();
262262
}
263263

264+
static bool ResizeInPlace(int64_t old_size, int64_t new_size, uint8_t* ptr) {
265+
CheckAllocatedArea(ptr, old_size, "in-place expanding");
266+
if (old_size == 0 || new_size == 0) {
267+
// Cannot expand
268+
return false;
269+
}
270+
auto maybe_raw_new_size = RawSize(new_size);
271+
if (!maybe_raw_new_size.ok()) {
272+
return false;
273+
}
274+
int64_t raw_new_size = *maybe_raw_new_size;
275+
DCHECK(raw_new_size > new_size)
276+
<< "bug in raw size computation: " << raw_new_size << " for size " << new_size;
277+
bool success =
278+
WrappedAllocator::ResizeInPlace(old_size + kOverhead, raw_new_size, ptr);
279+
if (success) {
280+
InitAllocatedArea(ptr, new_size);
281+
}
282+
return success;
283+
}
284+
264285
static void DeallocateAligned(uint8_t* ptr, int64_t size, int64_t alignment) {
265286
CheckAllocatedArea(ptr, size, "deallocation");
266287
if (ptr != memory_pool::internal::kZeroSizeArea) {
@@ -363,6 +384,11 @@ class SystemAllocator {
363384
return Status::OK();
364385
}
365386

387+
static bool ResizeInPlace(int64_t old_size, int64_t new_size, uint8_t* ptr) {
388+
// No standard C API for this
389+
return false;
390+
}
391+
366392
static void DeallocateAligned(uint8_t* ptr, int64_t size, int64_t /*alignment*/) {
367393
if (ptr == memory_pool::internal::kZeroSizeArea) {
368394
DCHECK_EQ(size, 0);
@@ -425,6 +451,14 @@ class MimallocAllocator {
425451
return Status::OK();
426452
}
427453

454+
static bool ResizeInPlace(int64_t old_size, int64_t new_size, uint8_t* ptr) {
455+
if (old_size == 0 || new_size == 0) {
456+
// Cannot resize
457+
return false;
458+
}
459+
return mi_expand(ptr, static_cast<size_t>(new_size)) != nullptr;
460+
}
461+
428462
static void DeallocateAligned(uint8_t* ptr, int64_t size, int64_t /*alignment*/) {
429463
if (ptr == memory_pool::internal::kZeroSizeArea) {
430464
DCHECK_EQ(size, 0);
@@ -498,6 +532,43 @@ class BaseMemoryPoolImpl : public MemoryPool {
498532
return Status::OK();
499533
}
500534

535+
Status ReallocateNoCopy(int64_t old_size, int64_t new_size, int64_t alignment,
536+
uint8_t** ptr) override {
537+
if (new_size == old_size) {
538+
return Status::OK();
539+
}
540+
if (new_size < 0) {
541+
return Status::Invalid("negative realloc size");
542+
}
543+
if (static_cast<uint64_t>(new_size) >= std::numeric_limits<size_t>::max()) {
544+
return Status::OutOfMemory("realloc overflows size_t");
545+
}
546+
// First try resizing in place
547+
if (!Allocator::ResizeInPlace(old_size, new_size, *ptr)) {
548+
// TODO comment
549+
if (std::max(old_size, new_size) >= 32 * 1024) {
550+
// Deallocate then allocate (faster than copying data?)
551+
Allocator::DeallocateAligned(*ptr, old_size, alignment);
552+
RETURN_NOT_OK(Allocator::AllocateAligned(new_size, alignment, ptr));
553+
} else {
554+
RETURN_NOT_OK(Allocator::ReallocateAligned(old_size, new_size, alignment, ptr));
555+
}
556+
}
557+
#ifndef NDEBUG
558+
// Poison data
559+
if (new_size > 0) {
560+
DCHECK_NE(*ptr, nullptr);
561+
if (new_size > old_size) {
562+
(*ptr)[old_size] = kReallocPoison;
563+
}
564+
(*ptr)[new_size - 1] = kReallocPoison;
565+
}
566+
#endif
567+
568+
stats_.UpdateAllocatedBytes(new_size - old_size);
569+
return Status::OK();
570+
}
571+
501572
void Free(uint8_t* buffer, int64_t size, int64_t alignment) override {
502573
#ifndef NDEBUG
503574
// Poison data
@@ -721,6 +792,14 @@ Status LoggingMemoryPool::Reallocate(int64_t old_size, int64_t new_size,
721792
return s;
722793
}
723794

795+
Status LoggingMemoryPool::ReallocateNoCopy(int64_t old_size, int64_t new_size,
796+
int64_t alignment, uint8_t** ptr) {
797+
Status s = pool_->ReallocateNoCopy(old_size, new_size, ptr);
798+
std::cout << "ReallocateNoCopy: old_size = " << old_size << ", new_size = " << new_size
799+
<< ", alignment = " << alignment << std::endl;
800+
return s;
801+
}
802+
724803
void LoggingMemoryPool::Free(uint8_t* buffer, int64_t size, int64_t alignment) {
725804
pool_->Free(buffer, size, alignment);
726805
std::cout << "Free: size = " << size << ", alignment = " << alignment << std::endl;
@@ -772,6 +851,13 @@ class ProxyMemoryPool::ProxyMemoryPoolImpl {
772851
return Status::OK();
773852
}
774853

854+
Status ReallocateNoCopy(int64_t old_size, int64_t new_size, int64_t alignment,
855+
uint8_t** ptr) {
856+
RETURN_NOT_OK(pool_->ReallocateNoCopy(old_size, new_size, alignment, ptr));
857+
stats_.UpdateAllocatedBytes(new_size - old_size);
858+
return Status::OK();
859+
}
860+
775861
void Free(uint8_t* buffer, int64_t size, int64_t alignment) {
776862
pool_->Free(buffer, size, alignment);
777863
stats_.UpdateAllocatedBytes(-size, /*is_free=*/true);
@@ -807,6 +893,11 @@ Status ProxyMemoryPool::Reallocate(int64_t old_size, int64_t new_size, int64_t a
807893
return impl_->Reallocate(old_size, new_size, alignment, ptr);
808894
}
809895

896+
Status ProxyMemoryPool::ReallocateNoCopy(int64_t old_size, int64_t new_size,
897+
int64_t alignment, uint8_t** ptr) {
898+
return impl_->ReallocateNoCopy(old_size, new_size, alignment, ptr);
899+
}
900+
810901
void ProxyMemoryPool::Free(uint8_t* buffer, int64_t size, int64_t alignment) {
811902
return impl_->Free(buffer, size, alignment);
812903
}

cpp/src/arrow/memory_pool.h

+10
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ class ARROW_EXPORT MemoryPool {
108108
return Reallocate(old_size, new_size, kDefaultBufferAlignment, ptr);
109109
}
110110

111+
virtual Status ReallocateNoCopy(int64_t old_size, int64_t new_size, int64_t alignment,
112+
uint8_t** ptr) = 0;
113+
Status ReallocateNoCopy(int64_t old_size, int64_t new_size, uint8_t** ptr) {
114+
return ReallocateNoCopy(old_size, new_size, kDefaultBufferAlignment, ptr);
115+
}
116+
111117
/// Free an allocated region.
112118
///
113119
/// @param buffer Pointer to the start of the allocated memory region
@@ -162,6 +168,8 @@ class ARROW_EXPORT LoggingMemoryPool : public MemoryPool {
162168
Status Allocate(int64_t size, int64_t alignment, uint8_t** out) override;
163169
Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment,
164170
uint8_t** ptr) override;
171+
Status ReallocateNoCopy(int64_t old_size, int64_t new_size, int64_t alignment,
172+
uint8_t** ptr) override;
165173
void Free(uint8_t* buffer, int64_t size, int64_t alignment) override;
166174

167175
int64_t bytes_allocated() const override;
@@ -194,6 +202,8 @@ class ARROW_EXPORT ProxyMemoryPool : public MemoryPool {
194202
Status Allocate(int64_t size, int64_t alignment, uint8_t** out) override;
195203
Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment,
196204
uint8_t** ptr) override;
205+
Status ReallocateNoCopy(int64_t old_size, int64_t new_size, int64_t alignment,
206+
uint8_t** ptr) override;
197207
void Free(uint8_t* buffer, int64_t size, int64_t alignment) override;
198208

199209
int64_t bytes_allocated() const override;

cpp/src/arrow/memory_pool_benchmark.cc

+93
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
#include "arrow/config.h" // for ARROW_JEMALLOC, ARROW_MIMALLOC
1819
#include "arrow/memory_pool.h"
1920
#include "arrow/result.h"
2021
#include "arrow/util/logging.h"
@@ -114,6 +115,76 @@ static void AllocateTouchDeallocate(
114115
state.SetBytesProcessed(state.iterations() * nbytes);
115116
}
116117

118+
template <typename Alloc, bool Copy>
119+
static void BenchmarkReallocateGrowing(benchmark::State& state) {
120+
// 256 kiB: typical max size for a scratch space (L2-sized)
121+
const int64_t max_size = 256 << 10;
122+
// 4 kiB: typical increment when resizing a scratch space
123+
const int64_t increment = 4096;
124+
MemoryPool* pool = *Alloc::GetAllocator();
125+
int64_t nb_reallocs = 0;
126+
127+
for (auto _ : state) {
128+
uint8_t* data;
129+
int64_t size = 0;
130+
ARROW_CHECK_OK(pool->Allocate(size, &data));
131+
for (; size < max_size; size += increment) {
132+
if constexpr (Copy) {
133+
ARROW_CHECK_OK(pool->Reallocate(size - increment, size, &data));
134+
} else {
135+
ARROW_CHECK_OK(pool->ReallocateNoCopy(size - increment, size, &data));
136+
}
137+
++nb_reallocs;
138+
}
139+
pool->Free(data, size - increment);
140+
}
141+
state.SetItemsProcessed(nb_reallocs);
142+
}
143+
144+
template <typename Alloc>
145+
static void ReallocateGrowing(benchmark::State& state) {
146+
BenchmarkReallocateGrowing<Alloc, /*Copy=*/true>(state);
147+
}
148+
149+
template <typename Alloc>
150+
static void ReallocateGrowingNoCopy(benchmark::State& state) {
151+
BenchmarkReallocateGrowing<Alloc, /*Copy=*/false>(state);
152+
}
153+
154+
template <typename Alloc, bool Copy>
155+
static void BenchmarkReallocateShrinking(benchmark::State& state) {
156+
const int64_t max_size = 256 << 10; // 256 kiB
157+
const int64_t increment = 4096;
158+
MemoryPool* pool = *Alloc::GetAllocator();
159+
int64_t nb_reallocs = 0;
160+
161+
for (auto _ : state) {
162+
uint8_t* data;
163+
int64_t size = max_size;
164+
ARROW_CHECK_OK(pool->Allocate(size, &data));
165+
for (; size >= 0; size -= increment) {
166+
if constexpr (Copy) {
167+
ARROW_CHECK_OK(pool->Reallocate(size + increment, size, &data));
168+
} else {
169+
ARROW_CHECK_OK(pool->ReallocateNoCopy(size + increment, size, &data));
170+
}
171+
++nb_reallocs;
172+
}
173+
pool->Free(data, size + increment);
174+
}
175+
state.SetItemsProcessed(nb_reallocs);
176+
}
177+
178+
template <typename Alloc>
179+
static void ReallocateShrinking(benchmark::State& state) {
180+
BenchmarkReallocateShrinking<Alloc, /*Copy=*/true>(state);
181+
}
182+
183+
template <typename Alloc>
184+
static void ReallocateShrinkingNoCopy(benchmark::State& state) {
185+
BenchmarkReallocateShrinking<Alloc, /*Copy=*/false>(state);
186+
}
187+
117188
#define BENCHMARK_ALLOCATE_ARGS \
118189
->RangeMultiplier(16)->Range(4096, 16 * 1024 * 1024)->ArgName("size")->UseRealTime()
119190

@@ -135,4 +206,26 @@ BENCHMARK_ALLOCATE(AllocateDeallocate, Mimalloc);
135206
BENCHMARK_ALLOCATE(AllocateTouchDeallocate, Mimalloc);
136207
#endif
137208

209+
BENCHMARK_TEMPLATE(ReallocateGrowing, SystemAlloc);
210+
BENCHMARK_TEMPLATE(ReallocateGrowingNoCopy, SystemAlloc);
211+
#ifdef ARROW_JEMALLOC
212+
BENCHMARK_TEMPLATE(ReallocateGrowing, Jemalloc);
213+
BENCHMARK_TEMPLATE(ReallocateGrowingNoCopy, Jemalloc);
214+
#endif
215+
#ifdef ARROW_MIMALLOC
216+
BENCHMARK_TEMPLATE(ReallocateGrowing, Mimalloc);
217+
BENCHMARK_TEMPLATE(ReallocateGrowingNoCopy, Mimalloc);
218+
#endif
219+
220+
BENCHMARK_TEMPLATE(ReallocateShrinking, SystemAlloc);
221+
BENCHMARK_TEMPLATE(ReallocateShrinkingNoCopy, SystemAlloc);
222+
#ifdef ARROW_JEMALLOC
223+
BENCHMARK_TEMPLATE(ReallocateShrinking, Jemalloc);
224+
BENCHMARK_TEMPLATE(ReallocateShrinkingNoCopy, Jemalloc);
225+
#endif
226+
#ifdef ARROW_MIMALLOC
227+
BENCHMARK_TEMPLATE(ReallocateShrinking, Mimalloc);
228+
BENCHMARK_TEMPLATE(ReallocateShrinkingNoCopy, Mimalloc);
229+
#endif
230+
138231
} // namespace arrow

cpp/src/arrow/memory_pool_internal.h

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class JemallocAllocator {
4242
static Status AllocateAligned(int64_t size, int64_t alignment, uint8_t** out);
4343
static Status ReallocateAligned(int64_t old_size, int64_t new_size, int64_t alignment,
4444
uint8_t** ptr);
45+
static bool ResizeInPlace(int64_t old_size, int64_t new_size, uint8_t* ptr);
4546
static void DeallocateAligned(uint8_t* ptr, int64_t size, int64_t alignment);
4647
static void ReleaseUnused();
4748
};

cpp/src/arrow/memory_pool_jemalloc.cc

+11
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,17 @@ Status JemallocAllocator::ReallocateAligned(int64_t old_size, int64_t new_size,
118118
return Status::OK();
119119
}
120120

121+
bool JemallocAllocator::ResizeInPlace(int64_t old_size, int64_t new_size, uint8_t* ptr) {
122+
if (old_size == 0 || new_size == 0) {
123+
// Cannot resize
124+
return false;
125+
}
126+
// No need to pass any alignment since this doesn't move the base pointer
127+
int64_t got_size = static_cast<int64_t>(xallocx(ptr, static_cast<size_t>(new_size),
128+
/*extra=*/0, /*flags=*/0));
129+
return got_size == new_size;
130+
}
131+
121132
void JemallocAllocator::DeallocateAligned(uint8_t* ptr, int64_t size, int64_t alignment) {
122133
if (ptr == kZeroSizeArea) {
123134
DCHECK_EQ(size, 0);

cpp/src/arrow/memory_pool_test.cc

+18-17
Original file line numberDiff line numberDiff line change
@@ -63,32 +63,33 @@ class TestMemoryPool : public ::arrow::TestMemoryPoolBase {
6363
MemoryPool* memory_pool() override { return Factory::memory_pool(); }
6464
};
6565

66-
TYPED_TEST_SUITE_P(TestMemoryPool);
66+
using MemoryPoolFactories =
67+
::testing::Types<DefaultMemoryPoolFactory, SystemMemoryPoolFactory
68+
#ifdef ARROW_JEMALLOC
69+
,
70+
JemallocMemoryPoolFactory
71+
#endif
72+
#ifdef ARROW_MIMALLOC
73+
,
74+
MimallocMemoryPoolFactory
75+
#endif
76+
>;
77+
78+
TYPED_TEST_SUITE(TestMemoryPool, MemoryPoolFactories);
6779

68-
TYPED_TEST_P(TestMemoryPool, MemoryTracking) { this->TestMemoryTracking(); }
80+
TYPED_TEST(TestMemoryPool, MemoryTracking) { this->TestMemoryTracking(); }
6981

70-
TYPED_TEST_P(TestMemoryPool, OOM) {
82+
TYPED_TEST(TestMemoryPool, OOM) {
7183
#ifndef ADDRESS_SANITIZER
7284
this->TestOOM();
7385
#endif
7486
}
7587

76-
TYPED_TEST_P(TestMemoryPool, Reallocate) { this->TestReallocate(); }
77-
78-
TYPED_TEST_P(TestMemoryPool, Alignment) { this->TestAlignment(); }
88+
TYPED_TEST(TestMemoryPool, Reallocate) { this->TestReallocate(); }
7989

80-
REGISTER_TYPED_TEST_SUITE_P(TestMemoryPool, MemoryTracking, OOM, Reallocate, Alignment);
90+
TYPED_TEST(TestMemoryPool, ReallocateNoCopy) { this->TestReallocateNoCopy(); }
8191

82-
INSTANTIATE_TYPED_TEST_SUITE_P(Default, TestMemoryPool, DefaultMemoryPoolFactory);
83-
INSTANTIATE_TYPED_TEST_SUITE_P(System, TestMemoryPool, SystemMemoryPoolFactory);
84-
85-
#ifdef ARROW_JEMALLOC
86-
INSTANTIATE_TYPED_TEST_SUITE_P(Jemalloc, TestMemoryPool, JemallocMemoryPoolFactory);
87-
#endif
88-
89-
#ifdef ARROW_MIMALLOC
90-
INSTANTIATE_TYPED_TEST_SUITE_P(Mimalloc, TestMemoryPool, MimallocMemoryPoolFactory);
91-
#endif
92+
TYPED_TEST(TestMemoryPool, Alignment) { this->TestAlignment(); }
9293

9394
TEST(DefaultMemoryPool, Identity) {
9495
// The default memory pool is pointer-identical to one of the backend-specific pools.

0 commit comments

Comments
 (0)