Skip to content

Commit 635eb22

Browse files
authored
Merge pull request #3 from byrnedj/acquire_sergey
Fix for race in acquire with test case
2 parents e38424c + bada673 commit 635eb22

8 files changed

+225
-14
lines changed

cachelib/CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -396,4 +396,10 @@ if (BUILD_TESTS)
396396
RENAME
397397
Makefile
398398
)
399+
install(
400+
FILES
401+
${CACHELIB_HOME}/allocator/tests/ChainedItemParentAcquireAfterMove.gdb
402+
DESTINATION
403+
${TESTS_INSTALL_DIR}
404+
)
399405
endif()

cachelib/allocator/CacheAllocator-inl.h

+6-14
Original file line numberDiff line numberDiff line change
@@ -2639,20 +2639,12 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(
26392639
const auto allocInfo = allocator_->getAllocInfo(oldItem.getMemory());
26402640
if (chainedItem) {
26412641
newItemHdl.reset();
2642-
auto ref = parentItem->unmarkMoving();
2643-
if (UNLIKELY(ref == 0)) {
2644-
wakeUpWaiters(*parentItem, {});
2645-
const auto res =
2646-
releaseBackToAllocator(*parentItem, RemoveContext::kNormal, false);
2647-
XDCHECK(res == ReleaseRes::kReleased);
2648-
return true;
2649-
} else {
2650-
XDCHECK_NE(ref, 0);
2651-
auto parentHdl = acquire(parentItem);
2652-
if (parentHdl) {
2653-
wakeUpWaiters(*parentItem, std::move(parentHdl));
2654-
}
2655-
}
2642+
auto ref = parentItem->unmarkMovingAndIncRef();
2643+
XDCHECK_NE(ref, 0);
2644+
auto parentHdl = acquire(parentItem);
2645+
XDCHECK(parentHdl);
2646+
parentHdl->decRef();
2647+
wakeUpWaiters(*parentItem, std::move(parentHdl));
26562648
} else {
26572649
auto ref = unmarkMovingAndWakeUpWaiters(oldItem, std::move(newItemHdl));
26582650
XDCHECK(ref == 0);

cachelib/allocator/CacheItem-inl.h

+5
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,11 @@ RefcountWithFlags::Value CacheItem<CacheTrait>::unmarkMoving() noexcept {
247247
return ref_.unmarkMoving();
248248
}
249249

250+
template <typename CacheTrait>
251+
RefcountWithFlags::Value CacheItem<CacheTrait>::unmarkMovingAndIncRef() noexcept {
252+
return ref_.unmarkMovingAndIncRef();
253+
}
254+
250255
template <typename CacheTrait>
251256
bool CacheItem<CacheTrait>::isMoving() const noexcept {
252257
return ref_.isMoving();

cachelib/allocator/CacheItem.h

+1
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ class CACHELIB_PACKED_ATTR CacheItem {
380380
*/
381381
bool markMoving();
382382
RefcountWithFlags::Value unmarkMoving() noexcept;
383+
RefcountWithFlags::Value unmarkMovingAndIncRef() noexcept;
383384
bool isMoving() const noexcept;
384385
bool isOnlyMoving() const noexcept;
385386

cachelib/allocator/Refcount.h

+29
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,35 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
379379

380380
return retValue & kRefMask;
381381
}
382+
383+
/*
384+
* this is used when we immediately call acquire after unmarking
385+
* moving - this is currently done in the case of moving a
386+
* chained item when the parent is unmarked moving and we
387+
* need to wake up the waiters with the parent handle BUT
388+
* we don't want the parent item to be marked moving/exclusive
389+
* between unmarking moving and acquire - so we do not
390+
* modify the refcount (moving state = exclusive bit set and refcount == 1)
391+
*/
392+
Value unmarkMovingAndIncRef() noexcept {
393+
XDCHECK(isMoving());
394+
auto predicate = [](const Value curValue) {
395+
XDCHECK((curValue & kAccessRefMask) != 0);
396+
return true;
397+
};
398+
399+
Value retValue;
400+
auto newValue = [&retValue](const Value curValue) {
401+
retValue =
402+
(curValue) & ~getAdminRef<kExclusive>();
403+
return retValue;
404+
};
405+
406+
auto updated = atomicUpdateValue(predicate, newValue);
407+
XDCHECK(updated);
408+
409+
return retValue & kRefMask;
410+
}
382411

383412
bool isMoving() const noexcept {
384413
auto raw = getRaw();

cachelib/allocator/tests/AllocatorTypeTest.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,10 @@ TYPED_TEST(BaseAllocatorTest, TransferChainAfterMoving) {
292292
this->testTransferChainAfterMoving();
293293
}
294294

295+
TYPED_TEST(BaseAllocatorTest, ChainedItemParentAcquireAfterMove) {
296+
this->testChainedItemParentAcquireAfterMove();
297+
}
298+
295299
TYPED_TEST(BaseAllocatorTest, AddAndPopChainedItemMultithread) {
296300
this->testAddAndPopChainedItemMultithread();
297301
}

cachelib/allocator/tests/BaseAllocatorTest.h

+152
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
#include <folly/Random.h>
2020
#include <folly/Singleton.h>
2121
#include <folly/synchronization/Baton.h>
22+
#include <folly/synchronization/Latch.h>
23+
24+
#include <fcntl.h>
25+
#include <unistd.h>
26+
#include <ctype.h>
27+
#include <semaphore.h>
2228

2329
#include <algorithm>
2430
#include <chrono>
@@ -5189,6 +5195,152 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
51895195
}
51905196
}
51915197

5198+
// helper function to check if gdb is running and attached to test
5199+
bool isGdbAttached() {
5200+
char buf[4096];
5201+
const int status_fd = open("/proc/self/status", O_RDONLY);
5202+
if (status_fd == -1) {
5203+
return false;
5204+
}
5205+
const ssize_t num_read = read(status_fd, buf, sizeof(buf) - 1);
5206+
close(status_fd);
5207+
5208+
if (num_read <= 0) {
5209+
return false;
5210+
}
5211+
buf[num_read] = '\0';
5212+
constexpr char tracerPidString[] = "TracerPid:";
5213+
const auto tracer_pid_ptr = strstr(buf, tracerPidString);
5214+
if (!tracer_pid_ptr) {
5215+
return false;
5216+
}
5217+
for (const char* characterPtr = tracer_pid_ptr + sizeof(tracerPidString) - 1;
5218+
characterPtr <= buf + num_read; ++characterPtr) {
5219+
if (isspace(*characterPtr)) {
5220+
continue;
5221+
} else {
5222+
return isdigit(*characterPtr) != 0 && *characterPtr != '0';
5223+
}
5224+
}
5225+
return false;
5226+
}
5227+
5228+
void gdb_sync1() { sleep(1); }
5229+
void gdb_sync2() { sleep(1); }
5230+
void gdb_sync3() { sleep(1); }
5231+
void gdb_sync4() { sleep(1); }
5232+
void testChainedItemParentAcquireAfterMove() {
5233+
sem_unlink("/gdb1_sem");
5234+
sem_t *sem = sem_open("/gdb1_sem", O_CREAT | O_EXCL, S_IRUSR | S_IWUSR, 0);
5235+
int ppid = getpid(); //parent pid
5236+
int fpid = fork();
5237+
if (fpid == 0) {
5238+
sem_wait(sem);
5239+
sem_close(sem);
5240+
sem_unlink("/gdb1_sem");
5241+
char cmdpid[256];
5242+
sprintf(cmdpid,"%d",ppid);
5243+
int f = execlp("gdb","gdb","--pid",cmdpid,
5244+
"--batch-silent","--command=ChainedItemParentAcquireAfterMove.gdb",(char*) 0);
5245+
ASSERT(f != -1);
5246+
}
5247+
5248+
// create an allocator worth 4 slabs
5249+
// first slab is for overhead, second is parent class
5250+
// thrid is chained item 1 and fourth is for new chained item alloc
5251+
// to move to.
5252+
typename AllocatorT::Config config;
5253+
config.configureChainedItems();
5254+
5255+
config.setCacheSize(4 * Slab::kSize);
5256+
5257+
using Item = typename AllocatorT::Item;
5258+
5259+
std::atomic<uint64_t> numRemovedKeys{0};
5260+
config.setRemoveCallback(
5261+
[&](const typename AllocatorT::RemoveCbData&) { ++numRemovedKeys; });
5262+
5263+
std::atomic<uint64_t> numMoves{0};
5264+
config.enableMovingOnSlabRelease(
5265+
[&](Item& oldItem, Item& newItem, Item* /* parentPtr */) {
5266+
assert(oldItem.getSize() == newItem.getSize());
5267+
assert(oldItem.isChainedItem());
5268+
std::memcpy(newItem.getMemory(), oldItem.getMemory(),
5269+
oldItem.getSize());
5270+
++numMoves;
5271+
}
5272+
);
5273+
5274+
AllocatorT alloc(config);
5275+
const size_t numBytes = alloc.getCacheMemoryStats().ramCacheSize;
5276+
const auto poolSize = numBytes;
5277+
//items are 100K and 200K
5278+
const std::set<uint32_t> allocSizes = {2*1024*1024 + 1024 , 3*1024*1024};
5279+
const auto pid = alloc.addPool("one", poolSize, allocSizes);
5280+
5281+
// Allocate 1 parent items and for each parent item, 1 chained
5282+
// allocation
5283+
auto allocFn = [&](std::string keyPrefix, std::vector<uint32_t> sizes) {
5284+
for (unsigned int loop = 0; loop < 1; ++loop) {
5285+
std::vector<uint8_t*> bufList;
5286+
std::vector<typename AllocatorT::WriteHandle> parentHandles;
5287+
for (unsigned int i = 0; i < 1; ++i) {
5288+
const auto key = keyPrefix + folly::to<std::string>(loop) + "_" +
5289+
folly::to<std::string>(i);
5290+
5291+
auto itemHandle = util::allocateAccessible(alloc, pid, key, sizes[0]);
5292+
5293+
for (unsigned int j = 0; j < 1; ++j) {
5294+
auto childItem =
5295+
alloc.allocateChainedItem(itemHandle, sizes[1]);
5296+
ASSERT_NE(nullptr, childItem);
5297+
5298+
uint8_t* buf = reinterpret_cast<uint8_t*>(childItem->getMemory());
5299+
// write first 50 bytes here
5300+
for (uint8_t k = 0; k < 50; ++k) {
5301+
buf[k] = k;
5302+
}
5303+
bufList.push_back(buf);
5304+
5305+
alloc.addChainedItem(itemHandle, std::move(childItem));
5306+
}
5307+
}
5308+
}
5309+
};
5310+
auto sizes = std::vector<uint32_t>{2*1024*1024, 3*1024*1024 - 1000};
5311+
allocFn(std::string{"yolo"}, sizes);
5312+
5313+
sem_post(sem);
5314+
while (!isGdbAttached()) {
5315+
std::this_thread::sleep_for(std::chrono::milliseconds(1));
5316+
}
5317+
//need to suspend thread 1 - who is doing the eviction
5318+
//gdb will do this for us
5319+
folly::Latch latch(1);
5320+
std::unique_ptr<std::thread> r = std::make_unique<std::thread>([&](){
5321+
ClassId cid = static_cast<ClassId>(1);
5322+
//gdb will interupt this thread when we hit acquire
5323+
//we will then switch to the other thread and attempt
5324+
//to evict the parent
5325+
gdb_sync1();
5326+
latch.count_down();
5327+
alloc.releaseSlab(pid, cid, SlabReleaseMode::kRebalance);
5328+
gdb_sync3();
5329+
});
5330+
r->detach();
5331+
5332+
latch.wait();
5333+
//this alloc should fail because item can't be marked for eviction
5334+
//and we exceed evict attempts
5335+
auto itemHandle = util::allocateAccessible(alloc, pid, std::to_string(10), sizes[0]);
5336+
EXPECT_EQ(itemHandle, nullptr);
5337+
//complete the move
5338+
gdb_sync2();
5339+
auto itemHandle2 = util::allocateAccessible(alloc, pid, std::to_string(11), sizes[0]);
5340+
EXPECT_NE(itemHandle2, nullptr);
5341+
gdb_sync4();
5342+
5343+
}
51925344
// Test stats on counting chained items
51935345
// 1. Alloc an item and several chained items
51945346
// * Before inserting chained items, make sure count is zero
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
break gdb_sync1
2+
c
3+
set scheduler-locking on
4+
break acquire
5+
c
6+
c
7+
c
8+
c
9+
thread 1
10+
set scheduler-locking on
11+
del 2
12+
break gdb_sync2
13+
c
14+
thread 4
15+
set scheduler-locking on
16+
break gdb_sync3
17+
c
18+
thread 1
19+
set scheduler-locking off
20+
break gdb_sync4
21+
c
22+
c

0 commit comments

Comments
 (0)