diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index d14bcfa789..e172666cf6 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -823,20 +823,23 @@ CacheAllocator::releaseBackToAllocator(Item& it, removeFromMMContainer(*head); - // If this chained item is marked as exclusive, we will not free it. - // We must capture the exclusive state before we do the decRef when - // we know the item must still be valid - const bool wasExclusive = head->isExclusive(); + // If this chained item is marked as moving, we will not free it. + // We must capture the moving state before we do the decRef when + // we know the item must still be valid. Item cannot be marked as + // exclusive. Only parent can be marked as such and even parent needs + // to be unmark prior to calling releaseBackToAllocator. + const bool wasMoving = head->isMoving(); + XDCHECK(!head->isMarkedForEviction()); // Decref and check if we were the last reference. Now if the item - // was marked exclusive, after decRef, it will be free to be released + // was marked moving, after decRef, it will be free to be released // by slab release thread const auto childRef = head->decRef(); - // If the item is already exclusive and we already decremented the + // If the item is already moving and we already decremented the // refcount, we don't need to free this item. We'll let the slab // release thread take care of that - if (!wasExclusive) { + if (!wasMoving) { if (childRef != 0) { throw std::runtime_error(folly::sformat( "chained item refcount is not zero. We cannot proceed! " @@ -844,7 +847,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, childRef, head->toString())); } - // Item is not exclusive and refcount is 0, we can proceed to + // Item is not moving and refcount is 0, we can proceed to // free it or recylce the memory if (head == toRecycle) { XDCHECK(ReleaseRes::kReleased != res); @@ -872,9 +875,12 @@ CacheAllocator::releaseBackToAllocator(Item& it, } template -void CacheAllocator::incRef(Item& it) { - it.incRef(); - ++handleCount_.tlStats(); +bool CacheAllocator::incRef(Item& it) { + if (it.incRef()) { + ++handleCount_.tlStats(); + return true; + } + return false; } template @@ -894,8 +900,12 @@ CacheAllocator::acquire(Item* it) { SCOPE_FAIL { stats_.numRefcountOverflow.inc(); }; - incRef(*it); - return WriteHandle{it, *this}; + if (LIKELY(incRef(*it))) { + return WriteHandle{it, *this}; + } else { + // item is being evicted + return WriteHandle{}; + } } template @@ -1121,7 +1131,7 @@ bool CacheAllocator::moveRegularItem(Item& oldItem, // it is unsafe to replace the old item with a new one, so we should // also abort. if (!accessContainer_->replaceIf(oldItem, *newItemHdl, - itemExclusivePredicate)) { + itemSlabMovePredicate)) { return false; } @@ -1170,18 +1180,18 @@ bool CacheAllocator::moveChainedItem(ChainedItem& oldItem, // This item has been unlinked from its parent and we're the only // owner of it, so we're done here - if (!oldItem.isInMMContainer() || oldItem.isOnlyExclusive()) { + if (!oldItem.isInMMContainer() || oldItem.isOnlyMoving()) { return false; } - const auto parentKey = oldItem.getParentItem(compressor_).getKey(); - - // Grab lock to prevent anyone else from modifying the chain + auto& expectedParent = oldItem.getParentItem(compressor_); + const auto parentKey = expectedParent.getKey(); auto l = chainedItemLocks_.lockExclusive(parentKey); + // verify old item under the lock auto parentHandle = validateAndGetParentHandleForChainedMoveLocked(oldItem, parentKey); - if (!parentHandle) { + if (!parentHandle || &expectedParent != parentHandle.get()) { return false; } @@ -1201,7 +1211,7 @@ bool CacheAllocator::moveChainedItem(ChainedItem& oldItem, // In case someone else had removed this chained item from its parent by now // So we check again to see if the it has been unlinked from its parent - if (!oldItem.isInMMContainer() || oldItem.isOnlyExclusive()) { + if (!oldItem.isInMMContainer() || oldItem.isOnlyMoving()) { return false; } @@ -1217,12 +1227,34 @@ bool CacheAllocator::moveChainedItem(ChainedItem& oldItem, // parent's chain and the MMContainer. auto oldItemHandle = replaceChainedItemLocked(oldItem, std::move(newItemHdl), *parentHandle); - XDCHECK(oldItemHandle->isExclusive()); + XDCHECK(oldItemHandle->isMoving()); XDCHECK(!oldItemHandle->isInMMContainer()); return true; } +template +typename CacheAllocator::NvmCacheT::PutToken +CacheAllocator::createPutToken(Item& item) { + const bool evictToNvmCache = shouldWriteToNvmCache(item); + return evictToNvmCache ? nvmCache_->createPutToken(item.getKey()) + : typename NvmCacheT::PutToken{}; +} + +template +void CacheAllocator::unlinkItemForEviction(Item& it) { + XDCHECK(it.isMarkedForEviction()); + XDCHECK(it.getRefCount() == 0); + + accessContainer_->remove(it); + removeFromMMContainer(it); + + // Since we managed to mark the item for eviction we must be the only + // owner of the item. + const auto ref = it.unmarkForEviction(); + XDCHECK(ref == 0u); +} + template typename CacheAllocator::Item* CacheAllocator::findEviction(PoolId pid, ClassId cid) { @@ -1231,76 +1263,102 @@ CacheAllocator::findEviction(PoolId pid, ClassId cid) { // Keep searching for a candidate until we were able to evict it // or until the search limit has been exhausted unsigned int searchTries = 0; - auto itr = mmContainer.getEvictionIterator(); while ((config_.evictionSearchTries == 0 || - config_.evictionSearchTries > searchTries) && - itr) { - ++searchTries; - (*stats_.evictionAttempts)[pid][cid].inc(); + config_.evictionSearchTries > searchTries)) { + Item* toRecycle = nullptr; + Item* candidate = nullptr; + typename NvmCacheT::PutToken token; + + mmContainer.withEvictionIterator([this, pid, cid, &candidate, &toRecycle, + &searchTries, &mmContainer, + &token](auto&& itr) { + if (!itr) { + ++searchTries; + (*stats_.evictionAttempts)[pid][cid].inc(); + return; + } - Item* toRecycle = itr.get(); + while ((config_.evictionSearchTries == 0 || + config_.evictionSearchTries > searchTries) && + itr) { + ++searchTries; + (*stats_.evictionAttempts)[pid][cid].inc(); + + auto* toRecycle_ = itr.get(); + auto* candidate_ = + toRecycle_->isChainedItem() + ? &toRecycle_->asChainedItem().getParentItem(compressor_) + : toRecycle_; + + token = createPutToken(*candidate_); + + if (shouldWriteToNvmCache(*candidate_) && !token.isValid()) { + stats_.evictFailConcurrentFill.inc(); + } else if (candidate_->markForEviction()) { + XDCHECK(candidate_->isMarkedForEviction()); + // markForEviction to make sure no other thead is evicting the item + // nor holding a handle to that item + + toRecycle = toRecycle_; + candidate = candidate_; + + // Check if parent changed for chained items - if yes, we cannot + // remove the child from the mmContainer as we will not be evicting + // it. We could abort right here, but we need to cleanup in case + // unmarkForEviction() returns 0 - so just go through normal path. + if (!toRecycle_->isChainedItem() || + &toRecycle->asChainedItem().getParentItem(compressor_) == + candidate) + mmContainer.remove(itr); + return; + } - Item* candidate = - toRecycle->isChainedItem() - ? &toRecycle->asChainedItem().getParentItem(compressor_) - : toRecycle; + if (candidate_->hasChainedItem()) { + stats_.evictFailParentAC.inc(); + } else { + stats_.evictFailAC.inc(); + } - // make sure no other thead is evicting the item - if (candidate->getRefCount() != 0 || !candidate->markExclusive()) { - ++itr; + ++itr; + XDCHECK(toRecycle == nullptr); + XDCHECK(candidate == nullptr); + } + }); + + if (!toRecycle) continue; - } + + XDCHECK(toRecycle); + XDCHECK(candidate); // for chained items, the ownership of the parent can change. We try to // evict what we think as parent and see if the eviction of parent // recycles the child we intend to. - bool evictionSuccessful = false; - { - auto toReleaseHandle = - itr->isChainedItem() - ? advanceIteratorAndTryEvictChainedItem(itr) - : advanceIteratorAndTryEvictRegularItem(mmContainer, itr); - evictionSuccessful = toReleaseHandle != nullptr; - // destroy toReleseHandle. The item won't be released to allocator - // since we marked it as exclusive. - } - - const auto ref = candidate->unmarkExclusive(); - if (ref == 0u) { - // Invalidate iterator since later on we may use this mmContainer - // again, which cannot be done unless we drop this iterator - itr.destroy(); - - // recycle the item. it's safe to do so, even if toReleaseHandle was - // NULL. If `ref` == 0 then it means that we are the last holder of - // that item. - if (candidate->hasChainedItem()) { - (*stats_.chainedItemEvictions)[pid][cid].inc(); - } else { - (*stats_.regularItemEvictions)[pid][cid].inc(); - } + unlinkItemForEviction(*candidate); + XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving()); - if (auto eventTracker = getEventTracker()) { - eventTracker->record(AllocatorApiEvent::DRAM_EVICT, candidate->getKey(), - AllocatorApiResult::EVICTED, candidate->getSize(), - candidate->getConfiguredTTL().count()); - } + if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)) { + nvmCache_->put(*candidate, std::move(token)); + } - // check if by releasing the item we intend to, we actually - // recycle the candidate. - if (ReleaseRes::kRecycled == - releaseBackToAllocator(*candidate, RemoveContext::kEviction, - /* isNascent */ false, toRecycle)) { - return toRecycle; - } + if (candidate->hasChainedItem()) { + (*stats_.chainedItemEvictions)[pid][cid].inc(); } else { - XDCHECK(!evictionSuccessful); + (*stats_.regularItemEvictions)[pid][cid].inc(); + } + + if (auto eventTracker = getEventTracker()) { + eventTracker->record(AllocatorApiEvent::DRAM_EVICT, candidate->getKey(), + AllocatorApiResult::EVICTED, candidate->getSize(), + candidate->getConfiguredTTL().count()); } - // If we destroyed the itr to possibly evict and failed, we restart - // from the beginning again - if (!itr) { - itr.resetToBegin(); + // check if by releasing the item we intend to, we actually + // recycle the candidate. + auto ret = releaseBackToAllocator(*candidate, RemoveContext::kEviction, + /* isNascent */ false, toRecycle); + if (ret == ReleaseRes::kRecycled) { + return toRecycle; } } return nullptr; @@ -1444,7 +1502,7 @@ bool CacheAllocator::pushToNvmCacheFromRamForTesting( if (handle && nvmCache_ && shouldWriteToNvmCache(*handle) && shouldWriteToNvmCacheExclusive(*handle)) { - nvmCache_->put(handle, nvmCache_->createPutToken(handle->getKey())); + nvmCache_->put(*handle, nvmCache_->createPutToken(handle->getKey())); return true; } return false; @@ -1834,13 +1892,13 @@ std::vector CacheAllocator::dumpEvictionIterator( std::vector content; auto& mm = *mmContainers_[pid][cid]; - auto evictItr = mm.getEvictionIterator(); - size_t i = 0; - while (evictItr && i < numItems) { - content.push_back(evictItr->toString()); - ++evictItr; - ++i; - } + + mm.withEvictionIterator([&content, numItems](auto&& itr) { + while (itr && content.size() < numItems) { + content.push_back(itr->toString()); + ++itr; + } + }); return content; } @@ -2349,10 +2407,11 @@ void CacheAllocator::releaseSlabImpl( // 3. If 2 is successful, Move or Evict // 4. Move on to the next item if current item is freed for (auto alloc : releaseContext.getActiveAllocations()) { + auto startTimeSec = util::getCurrentTimeSec(); // Need to mark an item for release before proceeding // If we can't mark as moving, it means the item is already freed const bool isAlreadyFreed = - !markExclusiveForSlabRelease(releaseContext, alloc, throttler); + !markMovingForSlabRelease(releaseContext, alloc, throttler); if (isAlreadyFreed) { continue; } @@ -2389,7 +2448,7 @@ bool CacheAllocator::moveForSlabRelease( bool isMoved = false; auto startTime = util::getCurrentTimeSec(); - WriteHandle newItemHdl = allocateNewItemForOldItem(oldItem); + WriteHandle newItemHdl{}; for (unsigned int itemMovingAttempts = 0; itemMovingAttempts < config_.movingTries; @@ -2397,16 +2456,23 @@ bool CacheAllocator::moveForSlabRelease( stats_.numMoveAttempts.inc(); // Nothing to move and the key is likely also bogus for chained items. - if (oldItem.isOnlyExclusive()) { - oldItem.unmarkExclusive(); + if (oldItem.isOnlyMoving()) { + oldItem.unmarkMoving(); const auto res = releaseBackToAllocator(oldItem, RemoveContext::kNormal, false); XDCHECK(res == ReleaseRes::kReleased); return true; } + throttleWith(throttler, [&] { + XLOGF(WARN, + "Spent {} seconds, slab release still trying to move Item: {}. " + "Pool: {}, Class: {}.", + util::getCurrentTimeSec() - startTime, oldItem.toString(), + ctx.getPoolId(), ctx.getClassId()); + }); + if (!newItemHdl) { - // try to allocate again if it previously wasn't successful newItemHdl = allocateNewItemForOldItem(oldItem); } @@ -2417,14 +2483,6 @@ bool CacheAllocator::moveForSlabRelease( break; } } - - throttleWith(throttler, [&] { - XLOGF(WARN, - "Spent {} seconds, slab release still trying to move Item: {}. " - "Pool: {}, Class: {}.", - util::getCurrentTimeSec() - startTime, oldItem.toString(), - ctx.getPoolId(), ctx.getClassId()); - }); } // Return false if we've exhausted moving tries. @@ -2437,7 +2495,7 @@ bool CacheAllocator::moveForSlabRelease( // that's identical to this one to replace it. Here we just need to wait // until all users have dropped the item handles before we can proceed. startTime = util::getCurrentTimeSec(); - while (!oldItem.isOnlyExclusive()) { + while (!oldItem.isOnlyMoving()) { throttleWith(throttler, [&] { XLOGF(WARN, "Spent {} seconds, slab release still waiting for refcount to " @@ -2446,6 +2504,8 @@ bool CacheAllocator::moveForSlabRelease( ctx.getPoolId(), ctx.getClassId()); }); } + auto ref = oldItem.unmarkMoving(); + XDCHECK(ref, 0); const auto allocInfo = allocator_->getAllocInfo(oldItem.getMemory()); allocator_->free(&oldItem); @@ -2456,10 +2516,10 @@ bool CacheAllocator::moveForSlabRelease( } template -typename CacheAllocator::ReadHandle +typename CacheAllocator::WriteHandle CacheAllocator::validateAndGetParentHandleForChainedMoveLocked( const ChainedItem& item, const Key& parentKey) { - ReadHandle parentHandle{}; + WriteHandle parentHandle{}; try { parentHandle = findInternal(parentKey); // If the parent is not the same as the parent of the chained item, @@ -2478,6 +2538,7 @@ CacheAllocator::validateAndGetParentHandleForChainedMoveLocked( template typename CacheAllocator::WriteHandle CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { + XDCHECK(oldItem.isMoving()); if (oldItem.isChainedItem()) { const auto& oldChainedItem = oldItem.asChainedItem(); const auto parentKey = oldChainedItem.getParentItem(compressor_).getKey(); @@ -2491,8 +2552,8 @@ CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { return {}; } - // Set up the destination for the move. Since oldChainedItem would have - // the exclusive bit set, it won't be picked for eviction. + // Set up the destination for the move. Since oldChainedItem would + // be marked as moving, it won't be picked for eviction. auto newItemHdl = allocateChainedItemInternal(parentHandle, oldChainedItem.getSize()); if (!newItemHdl) { @@ -2503,7 +2564,7 @@ CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { auto parentPtr = parentHandle.getInternal(); XDCHECK_EQ(reinterpret_cast(parentPtr), reinterpret_cast( - &oldChainedItem.getParentItem(compressor_))); + &newItemHdl->asChainedItem().getParentItem(compressor_))); return newItemHdl; } @@ -2544,7 +2605,7 @@ bool CacheAllocator::tryMovingForSlabRelease( // item is still valid. const std::string parentKey = oldItem.asChainedItem().getParentItem(compressor_).getKey().str(); - if (oldItem.isOnlyExclusive()) { + if (oldItem.isOnlyMoving()) { // If chained item no longer has a refcount, its parent is already // being released, so we abort this try to moving. return false; @@ -2571,56 +2632,11 @@ void CacheAllocator::evictForSlabRelease( const SlabReleaseContext& ctx, Item& item, util::Throttler& throttler) { auto startTime = util::getCurrentTimeSec(); while (true) { + XDCHECK(item.isMoving()); stats_.numEvictionAttempts.inc(); - // if the item is already in a state where only the exclusive bit is set, - // nothing needs to be done. We simply need to unmark exclusive bit and free - // the item. - if (item.isOnlyExclusive()) { - item.unmarkExclusive(); - const auto res = - releaseBackToAllocator(item, RemoveContext::kNormal, false); - XDCHECK(ReleaseRes::kReleased == res); - return; - } - - // Since we couldn't move, we now evict this item. Owning handle will be - // the item's handle for regular/normal items and will be the parent - // handle for chained items. - auto owningHandle = - item.isChainedItem() - ? evictChainedItemForSlabRelease(item.asChainedItem()) - : evictNormalItemForSlabRelease(item); - - // we managed to evict the corresponding owner of the item and have the - // last handle for the owner. - if (owningHandle) { - const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); - if (owningHandle->hasChainedItem()) { - (*stats_.chainedItemEvictions)[allocInfo.poolId][allocInfo.classId] - .inc(); - } else { - (*stats_.regularItemEvictions)[allocInfo.poolId][allocInfo.classId] - .inc(); - } - - stats_.numEvictionSuccesses.inc(); - - // we have the last handle. no longer need to hold on to the exclusive bit - item.unmarkExclusive(); - - // manually decrement the refcount to call releaseBackToAllocator - const auto ref = decRef(*owningHandle); - XDCHECK(ref == 0); - const auto res = releaseBackToAllocator(*owningHandle.release(), - RemoveContext::kEviction, false); - XDCHECK(res == ReleaseRes::kReleased); - return; - } - if (shutDownInProgress_) { - item.unmarkExclusive(); + item.unmarkMoving(); allocator_->abortSlabRelease(ctx); throw exception::SlabReleaseAborted( folly::sformat("Slab Release aborted while trying to evict" @@ -2640,266 +2656,102 @@ void CacheAllocator::evictForSlabRelease( .toString()) : ""); }); - } -} - -template -typename CacheAllocator::WriteHandle -CacheAllocator::advanceIteratorAndTryEvictRegularItem( - MMContainer& mmContainer, EvictionIterator& itr) { - // we should flush this to nvmcache if it is not already present in nvmcache - // and the item is not expired. - Item& item = *itr; - const bool evictToNvmCache = shouldWriteToNvmCache(item); - - auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey()) - : typename NvmCacheT::PutToken{}; - - // record the in-flight eviciton. If not, we move on to next item to avoid - // stalling eviction. - if (evictToNvmCache && !token.isValid()) { - ++itr; - stats_.evictFailConcurrentFill.inc(); - return WriteHandle{}; - } - - // If there are other accessors, we should abort. Acquire a handle here since - // if we remove the item from both access containers and mm containers - // below, we will need a handle to ensure proper cleanup in case we end up - // not evicting this item - auto evictHandle = accessContainer_->removeIf(item, &itemExclusivePredicate); - if (!evictHandle) { - ++itr; - stats_.evictFailAC.inc(); - return evictHandle; - } - - mmContainer.remove(itr); - XDCHECK_EQ(reinterpret_cast(evictHandle.get()), - reinterpret_cast(&item)); - XDCHECK(!evictHandle->isInMMContainer()); - XDCHECK(!evictHandle->isAccessible()); - - // Invalidate iterator since later on if we are not evicting this - // item, we may need to rely on the handle we created above to ensure - // proper cleanup if the item's raw refcount has dropped to 0. - // And since this item may be a parent item that has some child items - // in this very same mmContainer, we need to make sure we drop this - // exclusive iterator so we can gain access to it when we're cleaning - // up the child items - itr.destroy(); - - // Ensure that there are no accessors after removing from the access - // container - XDCHECK(evictHandle->getRefCount() == 1); - - if (evictToNvmCache && shouldWriteToNvmCacheExclusive(item)) { - XDCHECK(token.isValid()); - nvmCache_->put(evictHandle, std::move(token)); - } - return evictHandle; -} - -template -typename CacheAllocator::WriteHandle -CacheAllocator::advanceIteratorAndTryEvictChainedItem( - EvictionIterator& itr) { - XDCHECK(itr->isChainedItem()); - - ChainedItem* candidate = &itr->asChainedItem(); - ++itr; - - // The parent could change at any point through transferChain. However, if - // that happens, we would realize that the releaseBackToAllocator return - // kNotRecycled and we would try another chained item, leading to transient - // failure. - auto& parent = candidate->getParentItem(compressor_); - - const bool evictToNvmCache = shouldWriteToNvmCache(parent); - - auto token = evictToNvmCache ? nvmCache_->createPutToken(parent.getKey()) - : typename NvmCacheT::PutToken{}; - - // if token is invalid, return. iterator is already advanced. - if (evictToNvmCache && !token.isValid()) { - stats_.evictFailConcurrentFill.inc(); - return WriteHandle{}; - } - - // check if the parent exists in the hashtable and refcount is drained. - auto parentHandle = - accessContainer_->removeIf(parent, &itemExclusivePredicate); - if (!parentHandle) { - stats_.evictFailParentAC.inc(); - return parentHandle; - } - - // Invalidate iterator since later on we may use the mmContainer - // associated with this iterator which cannot be done unless we - // drop this iterator - // - // This must be done once we know the parent is not nullptr. - // Since we can very well be the last holder of this parent item, - // which may have a chained item that is linked in this MM container. - itr.destroy(); - - // Ensure we have the correct parent and we're the only user of the - // parent, then free it from access container. Otherwise, we abort - XDCHECK_EQ(reinterpret_cast(&parent), - reinterpret_cast(parentHandle.get())); - XDCHECK_EQ(1u, parent.getRefCount()); - - removeFromMMContainer(*parentHandle); - - XDCHECK(!parent.isInMMContainer()); - XDCHECK(!parent.isAccessible()); - - if (evictToNvmCache && shouldWriteToNvmCacheExclusive(*parentHandle)) { - XDCHECK(token.isValid()); - XDCHECK(parentHandle->hasChainedItem()); - nvmCache_->put(parentHandle, std::move(token)); - } - - return parentHandle; -} - -template -typename CacheAllocator::WriteHandle -CacheAllocator::evictNormalItemForSlabRelease(Item& item) { - XDCHECK(item.isExclusive()); - - if (item.isOnlyExclusive()) { - return WriteHandle{}; - } - - auto predicate = [](const Item& it) { return it.getRefCount() == 0; }; - - const bool evictToNvmCache = shouldWriteToNvmCache(item); - auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey()) - : typename NvmCacheT::PutToken{}; - - // We remove the item from both access and mm containers. It doesn't matter - // if someone else calls remove on the item at this moment, the item cannot - // be freed as long as we have the exclusive bit set. - auto handle = accessContainer_->removeIf(item, std::move(predicate)); - if (!handle) { - return handle; - } + // if the item is already in a state where only the exclusive bit is set, + // nothing needs to be done. We simply need to call unmarkMoving and free + // the item. + if (item.isOnlyMoving()) { + item.unmarkMoving(); + const auto res = + releaseBackToAllocator(item, RemoveContext::kNormal, false); + XDCHECK(ReleaseRes::kReleased == res); + return; + } - XDCHECK_EQ(reinterpret_cast(handle.get()), - reinterpret_cast(&item)); - XDCHECK_EQ(1u, handle->getRefCount()); - removeFromMMContainer(item); + typename NvmCacheT::PutToken token; + Item* evicted; + if (item.isChainedItem()) { + auto& expectedParent = item.asChainedItem().getParentItem(compressor_); + const std::string parentKey = expectedParent.getKey().str(); + auto l = chainedItemLocks_.lockExclusive(parentKey); + + // check if the child is still in mmContainer and the expected parent is + // valid under the chained item lock. + if (expectedParent.getKey() != parentKey || !item.isInMMContainer() || + item.isOnlyMoving() || + &expectedParent != &item.asChainedItem().getParentItem(compressor_) || + !expectedParent.isAccessible() || !expectedParent.hasChainedItem()) { + continue; + } - // now that we are the only handle and we actually removed something from - // the RAM cache, we enqueue it to nvmcache. - if (evictToNvmCache && shouldWriteToNvmCacheExclusive(item)) { - nvmCache_->put(handle, std::move(token)); - } + // search if the child is present in the chain + { + auto parentHandle = findInternal(parentKey); + if (!parentHandle || parentHandle != &expectedParent) { + continue; + } - return handle; -} + ChainedItem* head = nullptr; + { // scope for the handle + auto headHandle = findChainedItem(expectedParent); + head = headHandle ? &headHandle->asChainedItem() : nullptr; + } -template -typename CacheAllocator::WriteHandle -CacheAllocator::evictChainedItemForSlabRelease(ChainedItem& child) { - XDCHECK(child.isExclusive()); - - // We have the child marked as moving, but dont know anything about the - // state of the parent. Unlike the case of regular eviction where we are - // sure that the child is inside the MMContainer, ensuring its parent is - // valid, we can not make any assumptions here. We try to find the parent - // first through the access container and then verify that the parent's - // chain points to the child before cleaning up the parent. If the parent - // was in the process of being re-allocated or child was being removed - // concurrently, we would synchronize here on one of the checks. - Item& expectedParent = child.getParentItem(compressor_); - - // Grab exclusive lock since we are modifying the chain. at this point, we - // dont know the state of the parent. so we need to do some validity checks - // after we have the chained item lock to ensure that we got the lock off of - // a valid state. - const std::string parentKey = expectedParent.getKey().str(); - auto l = chainedItemLocks_.lockExclusive(parentKey); + bool found = false; + while (head) { + if (head == &item) { + found = true; + break; + } + head = head->getNext(compressor_); + } - // check if the child is still in mmContainer and the expected parent is - // valid under the chained item lock. - if (expectedParent.getKey() != parentKey || !child.isInMMContainer() || - child.isOnlyExclusive() || - &expectedParent != &child.getParentItem(compressor_) || - !expectedParent.isAccessible() || !expectedParent.hasChainedItem()) { - return {}; - } + if (!found) { + continue; + } + } - // search if the child is present in the chain - auto parentHandle = findInternal(parentKey); - if (!parentHandle || parentHandle != &expectedParent) { - return {}; - } + evicted = &expectedParent; - ChainedItem* head = nullptr; - { // scope for the handle - auto headHandle = findChainedItem(expectedParent); - head = headHandle ? &headHandle->asChainedItem() : nullptr; - } + token = createPutToken(*evicted); + if (evicted->markForEviction()) { + // unmark the child so it will be freed + item.unmarkMoving(); + unlinkItemForEviction(*evicted); + } else { + continue; + } + } else { + evicted = &item; - bool found = false; - while (head) { - if (head == &child) { - found = true; - break; + token = createPutToken(*evicted); + if (evicted->markForEvictionWhenMoving()) { + unlinkItemForEviction(*evicted); + } else { + continue; + } } - head = head->getNext(compressor_); - } - - if (!found) { - return {}; - } - - // if we found the child in the parent's chain, we remove it and ensure that - // the handle we obtained was the last one. Before that, create a put token - // to guard any racing cache find to avoid item re-appearing in NvmCache. - const bool evictToNvmCache = shouldWriteToNvmCache(expectedParent); - - auto token = evictToNvmCache - ? nvmCache_->createPutToken(expectedParent.getKey()) - : typename NvmCacheT::PutToken{}; - if (!accessContainer_->removeIf(expectedParent, - parentEvictForSlabReleasePredicate)) { - return {}; - } - - // at this point, we should be the last handle owner - XDCHECK_EQ(1u, parentHandle->getRefCount()); - - // We remove the parent from both access and mm containers. It doesn't - // matter if someone else calls remove on the parent at this moment, it - // cannot be freed since we hold an active item handle - removeFromMMContainer(*parentHandle); + if (token.isValid() && shouldWriteToNvmCacheExclusive(*evicted)) { + nvmCache_->put(*evicted, std::move(token)); + } - // In case someone else had removed this chained item from its parent by now - // So we check again to see if it has been unlinked from its parent - if (!child.isInMMContainer() || child.isOnlyExclusive()) { - return {}; - } + const auto allocInfo = + allocator_->getAllocInfo(static_cast(evicted)); + if (evicted->hasChainedItem()) { + (*stats_.chainedItemEvictions)[allocInfo.poolId][allocInfo.classId].inc(); + } else { + (*stats_.regularItemEvictions)[allocInfo.poolId][allocInfo.classId].inc(); + } - // check after removing from the MMContainer that the parent is still not - // being marked as moving. If parent is moving, it will release the child - // item and we will wait for that. - if (parentHandle->isExclusive()) { - return {}; - } + stats_.numEvictionSuccesses.inc(); - // now that we are the only handle and we actually removed something from - // the RAM cache, we enqueue it to nvmcache. - if (evictToNvmCache && shouldWriteToNvmCacheExclusive(*parentHandle)) { - DCHECK(parentHandle->hasChainedItem()); - nvmCache_->put(parentHandle, std::move(token)); + XDCHECK(evicted->getRefCount() == 0); + const auto res = + releaseBackToAllocator(*evicted, RemoveContext::kEviction, false); + XDCHECK(res == ReleaseRes::kReleased); + return; } - - return parentHandle; } template @@ -2921,7 +2773,7 @@ bool CacheAllocator::removeIfExpired(const ReadHandle& handle) { } template -bool CacheAllocator::markExclusiveForSlabRelease( +bool CacheAllocator::markMovingForSlabRelease( const SlabReleaseContext& ctx, void* alloc, util::Throttler& throttler) { // MemoryAllocator::processAllocForRelease will execute the callback // if the item is not already free. So there are three outcomes here: @@ -2940,7 +2792,7 @@ bool CacheAllocator::markExclusiveForSlabRelease( // Since this callback is executed, the item is not yet freed itemFreed = false; Item* item = static_cast(memory); - if (item->markExclusive()) { + if (item->markMoving()) { markedMoving = true; } }; diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 612f6d2185..9540a57d92 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -1308,7 +1308,7 @@ class CacheAllocator : public CacheBase { private: // wrapper around Item's refcount and active handle tracking - FOLLY_ALWAYS_INLINE void incRef(Item& it); + FOLLY_ALWAYS_INLINE bool incRef(Item& it); FOLLY_ALWAYS_INLINE RefcountWithFlags::Value decRef(Item& it); // drops the refcount and if needed, frees the allocation back to the memory @@ -1359,6 +1359,12 @@ class CacheAllocator : public CacheBase { bool nascent = false, const Item* toRecycle = nullptr); + // Must be called by the thread which called markForEviction and + // succeeded. After this call, the item is unlinked from Access and + // MM Containers. The item is no longer marked as exclusive and it's + // ref count is 0 - it's available for recycling. + void unlinkItemForEviction(Item& it); + // acquires an handle on the item. returns an empty handle if it is null. // @param it pointer to an item // @return WriteHandle return a handle to this item @@ -1448,17 +1454,17 @@ class CacheAllocator : public CacheBase { // @return handle to the parent item if the validations pass // otherwise, an empty Handle is returned. // - ReadHandle validateAndGetParentHandleForChainedMoveLocked( + WriteHandle validateAndGetParentHandleForChainedMoveLocked( const ChainedItem& item, const Key& parentKey); // Given an existing item, allocate a new one for the // existing one to later be moved into. // - // @param oldItem the item we want to allocate a new item for + // @param item reference to the item we want to allocate a new item for // // @return handle to the newly allocated item // - WriteHandle allocateNewItemForOldItem(const Item& oldItem); + WriteHandle allocateNewItemForOldItem(const Item& item); // internal helper that grabs a refcounted handle to the item. This does // not record the access to reflect in the mmContainer. @@ -1512,7 +1518,7 @@ class CacheAllocator : public CacheBase { // callback is responsible for copying the contents and fixing the semantics // of chained item. // - // @param oldItem Reference to the item being moved + // @param oldItem item being moved // @param newItemHdl Reference to the handle of the new item being moved into // // @return true If the move was completed, and the containers were updated @@ -1660,26 +1666,7 @@ class CacheAllocator : public CacheBase { // @return An evicted item or nullptr if there is no suitable candidate. Item* findEviction(PoolId pid, ClassId cid); - using EvictionIterator = typename MMContainer::Iterator; - - // Advance the current iterator and try to evict a regular item - // - // @param mmContainer the container to look for evictions. - // @param itr iterator holding the item - // - // @return valid handle to regular item on success. This will be the last - // handle to the item. On failure an empty handle. - WriteHandle advanceIteratorAndTryEvictRegularItem(MMContainer& mmContainer, - EvictionIterator& itr); - - // Advance the current iterator and try to evict a chained item - // Iterator may also be reset during the course of this function - // - // @param itr iterator holding the item - // - // @return valid handle to the parent item on success. This will be the last - // handle to the item - WriteHandle advanceIteratorAndTryEvictChainedItem(EvictionIterator& itr); + using EvictionIterator = typename MMContainer::LockedIterator; // Deserializer CacheAllocatorMetadata and verify the version // @@ -1756,22 +1743,23 @@ class CacheAllocator : public CacheBase { // @return true when successfully marked as moving, // fasle when this item has already been freed - bool markExclusiveForSlabRelease(const SlabReleaseContext& ctx, - void* alloc, - util::Throttler& throttler); + bool markMovingForSlabRelease(const SlabReleaseContext& ctx, + void* alloc, + util::Throttler& throttler); // "Move" (by copying) the content in this item to another memory // location by invoking the move callback. // // // @param ctx slab release context - // @param item old item to be moved elsewhere + // @param oldItem old item to be moved elsewhere + // @param handle handle to the item or to it's parent (if chained) // @param throttler slow this function down as not to take too much cpu // // @return true if the item has been moved // false if we have exhausted moving attempts bool moveForSlabRelease(const SlabReleaseContext& ctx, - Item& item, + Item& oldItem, util::Throttler& throttler); // "Move" (by copying) the content in this item to another memory @@ -1794,18 +1782,7 @@ class CacheAllocator : public CacheBase { Item& item, util::Throttler& throttler); - // Helper function to evict a normal item for slab release - // - // @return last handle for corresponding to item on success. empty handle on - // failure. caller can retry if needed. - WriteHandle evictNormalItemForSlabRelease(Item& item); - - // Helper function to evict a child item for slab release - // As a side effect, the parent item is also evicted - // - // @return last handle to the parent item of the child on success. empty - // handle on failure. caller can retry. - WriteHandle evictChainedItemForSlabRelease(ChainedItem& item); + typename NvmCacheT::PutToken createPutToken(Item& item); // Helper function to remove a item if expired. // @@ -1927,18 +1904,14 @@ class CacheAllocator : public CacheBase { std::optional saveNvmCache(); void saveRamCache(); - static bool itemExclusivePredicate(const Item& item) { - return item.getRefCount() == 0; + static bool itemSlabMovePredicate(const Item& item) { + return item.isMoving() && item.getRefCount() == 0; } static bool itemExpiryPredicate(const Item& item) { return item.getRefCount() == 1 && item.isExpired(); } - static bool parentEvictForSlabReleasePredicate(const Item& item) { - return item.getRefCount() == 1 && !item.isExclusive(); - } - std::unique_ptr createDeserializer(); // Execute func on each item. `func` can throw exception but must ensure diff --git a/cachelib/allocator/CacheItem-inl.h b/cachelib/allocator/CacheItem-inl.h index f59fa9d599..0028e2776a 100644 --- a/cachelib/allocator/CacheItem-inl.h +++ b/cachelib/allocator/CacheItem-inl.h @@ -148,15 +148,16 @@ std::string CacheItem::toString() const { return folly::sformat( "item: " "memory={}:raw-ref={}:size={}:key={}:hex-key={}:" - "isInMMContainer={}:isAccessible={}:isExclusive={}:references={}:ctime=" + "isInMMContainer={}:isAccessible={}:isMarkedForEviction={}:" + "isMoving={}:references={}:ctime=" "{}:" "expTime={}:updateTime={}:isNvmClean={}:isNvmEvicted={}:hasChainedItem=" "{}", this, getRefCountAndFlagsRaw(), getSize(), folly::humanify(getKey().str()), folly::hexlify(getKey()), - isInMMContainer(), isAccessible(), isExclusive(), getRefCount(), - getCreationTime(), getExpiryTime(), getLastAccessTime(), isNvmClean(), - isNvmEvicted(), hasChainedItem()); + isInMMContainer(), isAccessible(), isMarkedForEviction(), isMoving(), + getRefCount(), getCreationTime(), getExpiryTime(), getLastAccessTime(), + isNvmClean(), isNvmEvicted(), hasChainedItem()); } } @@ -217,23 +218,43 @@ bool CacheItem::isInMMContainer() const noexcept { } template -bool CacheItem::markExclusive() noexcept { - return ref_.markExclusive(); +bool CacheItem::markForEviction() noexcept { + return ref_.markForEviction(); } template -RefcountWithFlags::Value CacheItem::unmarkExclusive() noexcept { - return ref_.unmarkExclusive(); +RefcountWithFlags::Value CacheItem::unmarkForEviction() noexcept { + return ref_.unmarkForEviction(); } template -bool CacheItem::isExclusive() const noexcept { - return ref_.isExclusive(); +bool CacheItem::isMarkedForEviction() const noexcept { + return ref_.isMarkedForEviction(); } template -bool CacheItem::isOnlyExclusive() const noexcept { - return ref_.isOnlyExclusive(); +bool CacheItem::markForEvictionWhenMoving() { + return ref_.markForEvictionWhenMoving(); +} + +template +bool CacheItem::markMoving() { + return ref_.markMoving(); +} + +template +RefcountWithFlags::Value CacheItem::unmarkMoving() noexcept { + return ref_.unmarkMoving(); +} + +template +bool CacheItem::isMoving() const noexcept { + return ref_.isMoving(); +} + +template +bool CacheItem::isOnlyMoving() const noexcept { + return ref_.isOnlyMoving(); } template @@ -335,7 +356,7 @@ bool CacheItem::updateExpiryTime(uint32_t expiryTimeSecs) noexcept { // check for moving to make sure we are not updating the expiry time while at // the same time re-allocating the item with the old state of the expiry time // in moveRegularItem(). See D6852328 - if (isExclusive() || !isInMMContainer() || isChainedItem()) { + if (isMoving() || isMarkedForEviction() || !isInMMContainer() || isChainedItem()) { return false; } // attempt to atomically update the value of expiryTime @@ -451,12 +472,14 @@ std::string CacheChainedItem::toString() const { return folly::sformat( "chained item: " "memory={}:raw-ref={}:size={}:parent-compressed-ptr={}:" - "isInMMContainer={}:isAccessible={}:isExclusive={}:references={}:ctime={}" + "isInMMContainer={}:isAccessible={}:isMarkedForEviction={}:" + "isMoving={}:references={}:ctime={}" ":" "expTime={}:updateTime={}", this, Item::getRefCountAndFlagsRaw(), Item::getSize(), cPtr.getRaw(), - Item::isInMMContainer(), Item::isAccessible(), Item::isExclusive(), - Item::getRefCount(), Item::getCreationTime(), Item::getExpiryTime(), + Item::isInMMContainer(), Item::isAccessible(), + Item::isMarkedForEviction(), Item::isMoving(), Item::getRefCount(), + Item::getCreationTime(), Item::getExpiryTime(), Item::getLastAccessTime()); } diff --git a/cachelib/allocator/CacheItem.h b/cachelib/allocator/CacheItem.h index 06136db032..afee315cbb 100644 --- a/cachelib/allocator/CacheItem.h +++ b/cachelib/allocator/CacheItem.h @@ -305,12 +305,17 @@ class CACHELIB_PACKED_ATTR CacheItem { */ RefcountWithFlags::Value getRefCountAndFlagsRaw() const noexcept; - FOLLY_ALWAYS_INLINE void incRef() { - if (LIKELY(ref_.incRef())) { - return; + // Increments item's ref count + // + // @return true on success, failure if item is marked as exclusive + // @throw exception::RefcountOverflow on ref count overflow + FOLLY_ALWAYS_INLINE bool incRef() { + try { + return ref_.incRef(); + } catch (exception::RefcountOverflow& e) { + throw exception::RefcountOverflow( + folly::sformat("{} item: {}", e.what(), toString())); } - throw exception::RefcountOverflow( - folly::sformat("Refcount maxed out. item: {}", toString())); } FOLLY_ALWAYS_INLINE RefcountWithFlags::Value decRef() { @@ -344,23 +349,43 @@ class CACHELIB_PACKED_ATTR CacheItem { /** * The following two functions corresond to whether or not an item is - * currently in the process of being moved. This happens during a slab - * rebalance, eviction or resize operation. + * currently in the process of being evicted. * - * An item can only be marked exclusive when `isInMMContainer` returns true. + * An item can only be marked exclusive when `isInMMContainer` returns true + * and item is not already exclusive nor moving and the ref count is 0. * This operation is atomic. * - * User can also query if an item "isOnlyExclusive". This returns true only - * if the refcount is 0 and only the exclusive bit is set. - * - * Unmarking exclusive does not depend on `isInMMContainer`. + * Unmarking exclusive does not depend on `isInMMContainer` * Unmarking exclusive will also return the refcount at the moment of * unmarking. */ - bool markExclusive() noexcept; - RefcountWithFlags::Value unmarkExclusive() noexcept; - bool isExclusive() const noexcept; - bool isOnlyExclusive() const noexcept; + bool markForEviction() noexcept; + RefcountWithFlags::Value unmarkForEviction() noexcept; + bool isMarkedForEviction() const noexcept; + + /** + * The following functions correspond to whether or not an item is + * currently in the processed of being moved. When moving, ref count + * is always >= 1. + * + * An item can only be marked moving when `isInMMContainer` returns true + * and item is not already exclusive nor moving. + * + * User can also query if an item "isOnlyMoving". This returns true only + * if the refcount is one and only the exclusive bit is set. + * + * Unmarking moving does not depend on `isInMMContainer` + * Unmarking moving will also return the refcount at the moment of + * unmarking. + */ + bool markMoving(); + RefcountWithFlags::Value unmarkMoving() noexcept; + bool isMoving() const noexcept; + bool isOnlyMoving() const noexcept; + + /** This function attempts to mark item as exclusive. + * Can only be called on the item that is moving.*/ + bool markForEvictionWhenMoving(); /** * Item cannot be marked both chained allocation and diff --git a/cachelib/allocator/MM2Q-inl.h b/cachelib/allocator/MM2Q-inl.h index d62b707179..ba388d40a4 100644 --- a/cachelib/allocator/MM2Q-inl.h +++ b/cachelib/allocator/MM2Q-inl.h @@ -241,29 +241,21 @@ bool MM2Q::Container::add(T& node) noexcept { } template T::*HookPtr> -typename MM2Q::Container::Iterator +typename MM2Q::Container::LockedIterator MM2Q::Container::getEvictionIterator() const noexcept { - // we cannot use combined critical sections with folly::DistributedMutex here - // because the lock is held for the lifetime of the eviction iterator. In - // other words, the abstraction of the iterator just does not lend itself well - // to combinable critical sections as the user can hold the lock for an - // arbitrary amount of time outside a lambda-friendly piece of code (eg. they - // can return the iterator from functions, pass it to functions, etc) - // - // it would be theoretically possible to refactor this interface into - // something like the following to allow combining - // - // mm2q.withEvictionIterator([&](auto iterator) { - // // user code - // }); - // - // at the time of writing it is unclear if the gains from combining are - // reasonable justification for the codemod required to achieve combinability - // as we don't expect this critical section to be the hotspot in user code. - // This is however subject to change at some time in the future as and when - // this assertion becomes false. LockHolder l(*lruMutex_); - return Iterator{std::move(l), lru_.rbegin()}; + return LockedIterator{std::move(l), lru_.rbegin()}; +} + +template T::*HookPtr> +template +void MM2Q::Container::withEvictionIterator(F&& fun) { + if (config_.useCombinedLockForIterators) { + lruMutex_->lock_combine([this, &fun]() { fun(Iterator{lru_.rbegin()}); }); + } else { + LockHolder lck{*lruMutex_}; + fun(Iterator{lru_.rbegin()}); + } } template T::*HookPtr> @@ -462,8 +454,9 @@ void MM2Q::Container::reconfigureLocked(const Time& currTime) { // Iterator Context Implementation template T::*HookPtr> -MM2Q::Container::Iterator::Iterator( - LockHolder l, const typename LruList::Iterator& iter) noexcept - : LruList::Iterator(iter), l_(std::move(l)) {} +MM2Q::Container::LockedIterator::LockedIterator( + LockHolder l, const Iterator& iter) noexcept + : Iterator(iter), l_(std::move(l)) {} + } // namespace cachelib } // namespace facebook diff --git a/cachelib/allocator/MM2Q.h b/cachelib/allocator/MM2Q.h index a3ffdb718e..9c5cac834c 100644 --- a/cachelib/allocator/MM2Q.h +++ b/cachelib/allocator/MM2Q.h @@ -68,6 +68,7 @@ class MM2Q { enum LruType { Warm, WarmTail, Hot, Cold, ColdTail, NumTypes }; // Config class for MM2Q + // TODO: implement support for useCombinedLockForIterators struct Config { // Create from serialized config explicit Config(SerializationConfigType configState) @@ -214,6 +215,50 @@ class MM2Q { size_t hotPercent, size_t coldPercent, uint32_t mmReconfigureInterval) + : Config(time, + ratio, + updateOnW, + updateOnR, + tryLockU, + rebalanceOnRecordAccs, + hotPercent, + coldPercent, + mmReconfigureInterval, + false) {} + + // @param time the LRU refresh time. + // An item will be promoted only once in each + // lru refresh time depite the + // number of accesses it gets. + // @param ratio the LRU refresh ratio. The ratio times the + // oldest element's lifetime in warm queue + // would be the minimum value of LRU refresh + // time. + // @param udpateOnW whether to promote the item on write + // @param updateOnR whether to promote the item on read + // @param tryLockU whether to use a try lock when doing + // update. + // @param rebalanceOnRecordAccs whether to do rebalance on access. If set + // to false, rebalance only happens when + // items are added or removed to the queue. + // @param hotPercent percentage number for the size of the hot + // queue in the overall size. + // @param coldPercent percentage number for the size of the cold + // queue in the overall size. + // @param mmReconfigureInterval Time interval for recalculating lru + // refresh time according to the ratio. + // useCombinedLockForIterators Whether to use combined locking for + // withEvictionIterator + Config(uint32_t time, + double ratio, + bool updateOnW, + bool updateOnR, + bool tryLockU, + bool rebalanceOnRecordAccs, + size_t hotPercent, + size_t coldPercent, + uint32_t mmReconfigureInterval, + bool useCombinedLockForIterators) : defaultLruRefreshTime(time), lruRefreshRatio(ratio), updateOnWrite(updateOnW), @@ -223,7 +268,8 @@ class MM2Q { hotSizePercent(hotPercent), coldSizePercent(coldPercent), mmReconfigureIntervalSecs( - std::chrono::seconds(mmReconfigureInterval)) { + std::chrono::seconds(mmReconfigureInterval)), + useCombinedLockForIterators(useCombinedLockForIterators) { checkLruSizes(); } @@ -306,6 +352,9 @@ class MM2Q { // Minimum interval between reconfigurations. If 0, reconfigure is never // called. std::chrono::seconds mmReconfigureIntervalSecs{}; + + // Whether to use combined locking for withEvictionIterator. + bool useCombinedLockForIterators{false}; }; // The container object which can be used to keep track of objects of type @@ -347,22 +396,24 @@ class MM2Q { Container(const Container&) = delete; Container& operator=(const Container&) = delete; + using Iterator = typename LruList::Iterator; + // context for iterating the MM container. At any given point of time, // there can be only one iterator active since we need to lock the LRU for // iteration. we can support multiple iterators at same time, by using a // shared ptr in the context for the lock holder in the future. - class Iterator : public LruList::Iterator { + class LockedIterator : public Iterator { public: // noncopyable but movable. - Iterator(const Iterator&) = delete; - Iterator& operator=(const Iterator&) = delete; + LockedIterator(const LockedIterator&) = delete; + LockedIterator& operator=(const LockedIterator&) = delete; - Iterator(Iterator&&) noexcept = default; + LockedIterator(LockedIterator&&) noexcept = default; // 1. Invalidate this iterator // 2. Unlock void destroy() { - LruList::Iterator::reset(); + Iterator::reset(); if (l_.owns_lock()) { l_.unlock(); } @@ -373,15 +424,15 @@ class MM2Q { if (!l_.owns_lock()) { l_.lock(); } - LruList::Iterator::resetToBegin(); + Iterator::resetToBegin(); } private: // private because it's easy to misuse and cause deadlock for MM2Q - Iterator& operator=(Iterator&&) noexcept = default; + LockedIterator& operator=(LockedIterator&&) noexcept = default; // create an lru iterator with the lock being held. - Iterator(LockHolder l, const typename LruList::Iterator& iter) noexcept; + LockedIterator(LockHolder l, const Iterator& iter) noexcept; // only the container can create iterators friend Container; @@ -422,7 +473,7 @@ class MM2Q { // same as the above but uses an iterator context. The iterator is updated // on removal of the corresponding node to point to the next node. The - // iterator context holds the lock on the lru. + // iterator context is responsible for locking. // // iterator will be advanced to the next node after removing the node // @@ -445,7 +496,12 @@ class MM2Q { // Obtain an iterator that start from the tail and can be used // to search for evictions. This iterator holds a lock to this // container and only one such iterator can exist at a time - Iterator getEvictionIterator() const noexcept; + LockedIterator getEvictionIterator() const noexcept; + + // Execute provided function under container lock. Function gets + // Iterator passed as parameter. + template + void withEvictionIterator(F&& f); // get the current config as a copy Config getConfig() const; diff --git a/cachelib/allocator/MMLru-inl.h b/cachelib/allocator/MMLru-inl.h index 75ba5037f6..d35759f212 100644 --- a/cachelib/allocator/MMLru-inl.h +++ b/cachelib/allocator/MMLru-inl.h @@ -212,10 +212,21 @@ bool MMLru::Container::add(T& node) noexcept { } template T::*HookPtr> -typename MMLru::Container::Iterator +typename MMLru::Container::LockedIterator MMLru::Container::getEvictionIterator() const noexcept { LockHolder l(*lruMutex_); - return Iterator{std::move(l), lru_.rbegin()}; + return LockedIterator{std::move(l), lru_.rbegin()}; +} + +template T::*HookPtr> +template +void MMLru::Container::withEvictionIterator(F&& fun) { + if (config_.useCombinedLockForIterators) { + lruMutex_->lock_combine([this, &fun]() { fun(Iterator{lru_.rbegin()}); }); + } else { + LockHolder lck{*lruMutex_}; + fun(Iterator{lru_.rbegin()}); + } } template T::*HookPtr> @@ -360,8 +371,9 @@ void MMLru::Container::reconfigureLocked(const Time& currTime) { // Iterator Context Implementation template T::*HookPtr> -MMLru::Container::Iterator::Iterator( - LockHolder l, const typename LruList::Iterator& iter) noexcept - : LruList::Iterator(iter), l_(std::move(l)) {} +MMLru::Container::LockedIterator::LockedIterator( + LockHolder l, const Iterator& iter) noexcept + : Iterator(iter), l_(std::move(l)) {} + } // namespace cachelib } // namespace facebook diff --git a/cachelib/allocator/MMLru.h b/cachelib/allocator/MMLru.h index c280242ae5..29c6d02689 100644 --- a/cachelib/allocator/MMLru.h +++ b/cachelib/allocator/MMLru.h @@ -145,6 +145,40 @@ class MMLru { bool tryLockU, uint8_t ipSpec, uint32_t mmReconfigureInterval) + : Config(time, + ratio, + updateOnW, + updateOnR, + tryLockU, + ipSpec, + mmReconfigureInterval, + false) {} + + // @param time the LRU refresh time in seconds. + // An item will be promoted only once in each lru refresh + // time depite the number of accesses it gets. + // @param ratio the lru refresh ratio. The ratio times the + // oldest element's lifetime in warm queue + // would be the minimum value of LRU refresh time. + // @param udpateOnW whether to promote the item on write + // @param updateOnR whether to promote the item on read + // @param tryLockU whether to use a try lock when doing update. + // @param ipSpec insertion point spec, which is the inverse power of + // length from the end of the queue. For example, value 1 + // means the insertion point is 1/2 from the end of LRU; + // value 2 means 1/4 from the end of LRU. + // @param mmReconfigureInterval Time interval for recalculating lru + // refresh time according to the ratio. + // useCombinedLockForIterators Whether to use combined locking for + // withEvictionIterator + Config(uint32_t time, + double ratio, + bool updateOnW, + bool updateOnR, + bool tryLockU, + uint8_t ipSpec, + uint32_t mmReconfigureInterval, + bool useCombinedLockForIterators) : defaultLruRefreshTime(time), lruRefreshRatio(ratio), updateOnWrite(updateOnW), @@ -152,7 +186,8 @@ class MMLru { tryLockUpdate(tryLockU), lruInsertionPointSpec(ipSpec), mmReconfigureIntervalSecs( - std::chrono::seconds(mmReconfigureInterval)) {} + std::chrono::seconds(mmReconfigureInterval)), + useCombinedLockForIterators(useCombinedLockForIterators) {} Config() = default; Config(const Config& rhs) = default; @@ -198,6 +233,9 @@ class MMLru { // Minimum interval between reconfigurations. If 0, reconfigure is never // called. std::chrono::seconds mmReconfigureIntervalSecs{}; + + // Whether to use combined locking for withEvictionIterator. + bool useCombinedLockForIterators{false}; }; // The container object which can be used to keep track of objects of type @@ -234,22 +272,24 @@ class MMLru { Container(const Container&) = delete; Container& operator=(const Container&) = delete; + using Iterator = typename LruList::Iterator; + // context for iterating the MM container. At any given point of time, // there can be only one iterator active since we need to lock the LRU for // iteration. we can support multiple iterators at same time, by using a // shared ptr in the context for the lock holder in the future. - class Iterator : public LruList::Iterator { + class LockedIterator : public Iterator { public: // noncopyable but movable. - Iterator(const Iterator&) = delete; - Iterator& operator=(const Iterator&) = delete; + LockedIterator(const LockedIterator&) = delete; + LockedIterator& operator=(const LockedIterator&) = delete; - Iterator(Iterator&&) noexcept = default; + LockedIterator(LockedIterator&&) noexcept = default; // 1. Invalidate this iterator // 2. Unlock void destroy() { - LruList::Iterator::reset(); + Iterator::reset(); if (l_.owns_lock()) { l_.unlock(); } @@ -260,15 +300,15 @@ class MMLru { if (!l_.owns_lock()) { l_.lock(); } - LruList::Iterator::resetToBegin(); + Iterator::resetToBegin(); } private: // private because it's easy to misuse and cause deadlock for MMLru - Iterator& operator=(Iterator&&) noexcept = default; + LockedIterator& operator=(LockedIterator&&) noexcept = default; // create an lru iterator with the lock being held. - Iterator(LockHolder l, const typename LruList::Iterator& iter) noexcept; + LockedIterator(LockHolder l, const Iterator& iter) noexcept; // only the container can create iterators friend Container; @@ -307,10 +347,9 @@ class MMLru { // state of node is unchanged. bool remove(T& node) noexcept; - using Iterator = Iterator; // same as the above but uses an iterator context. The iterator is updated // on removal of the corresponding node to point to the next node. The - // iterator context holds the lock on the lru. + // iterator context is responsible for locking. // // iterator will be advanced to the next node after removing the node // @@ -330,7 +369,12 @@ class MMLru { // Obtain an iterator that start from the tail and can be used // to search for evictions. This iterator holds a lock to this // container and only one such iterator can exist at a time - Iterator getEvictionIterator() const noexcept; + LockedIterator getEvictionIterator() const noexcept; + + // Execute provided function under container lock. Function gets + // iterator passed as parameter. + template + void withEvictionIterator(F&& f); // get copy of current config Config getConfig() const; diff --git a/cachelib/allocator/MMTinyLFU-inl.h b/cachelib/allocator/MMTinyLFU-inl.h index 59c72c1720..08f848d656 100644 --- a/cachelib/allocator/MMTinyLFU-inl.h +++ b/cachelib/allocator/MMTinyLFU-inl.h @@ -214,10 +214,18 @@ bool MMTinyLFU::Container::add(T& node) noexcept { } template T::*HookPtr> -typename MMTinyLFU::Container::Iterator +typename MMTinyLFU::Container::LockedIterator MMTinyLFU::Container::getEvictionIterator() const noexcept { LockHolder l(lruMutex_); - return Iterator{std::move(l), *this}; + return LockedIterator{std::move(l), *this}; +} + +template T::*HookPtr> +template +void MMTinyLFU::Container::withEvictionIterator(F&& fun) { + // TinyLFU uses spin lock which does not support combined locking + LockHolder lck{lruMutex_}; + fun(Iterator{*this}); } template T::*HookPtr> @@ -347,13 +355,18 @@ void MMTinyLFU::Container::reconfigureLocked(const Time& currTime) { lruRefreshTime_.store(lruRefreshTime, std::memory_order_relaxed); } +// Locked Iterator Context Implementation +template T::*HookPtr> +MMTinyLFU::Container::LockedIterator::LockedIterator( + LockHolder l, const Container& c) noexcept + : Iterator(c), l_(std::move(l)) {} + // Iterator Context Implementation template T::*HookPtr> MMTinyLFU::Container::Iterator::Iterator( - LockHolder l, const Container& c) noexcept + const Container& c) noexcept : c_(c), tIter_(c.lru_.getList(LruType::Tiny).rbegin()), - mIter_(c.lru_.getList(LruType::Main).rbegin()), - l_(std::move(l)) {} + mIter_(c.lru_.getList(LruType::Main).rbegin()) {} } // namespace cachelib } // namespace facebook diff --git a/cachelib/allocator/MMTinyLFU.h b/cachelib/allocator/MMTinyLFU.h index 690b5be8b3..a090ad41bb 100644 --- a/cachelib/allocator/MMTinyLFU.h +++ b/cachelib/allocator/MMTinyLFU.h @@ -339,7 +339,7 @@ class MMTinyLFU { class Iterator; // same as the above but uses an iterator context. The iterator is updated // on removal of the corresponding node to point to the next node. The - // iterator context holds the lock on the lru. + // iterator context is responsible for locking. // // iterator will be advanced to the next node after removing the node // @@ -356,10 +356,7 @@ class MMTinyLFU { // source node already existed. bool replace(T& oldNode, T& newNode) noexcept; - // context for iterating the MM container. At any given point of time, - // there can be only one iterator active since we need to lock the LRU for - // iteration. we can support multiple iterators at same time, by using a - // shared ptr in the context for the lock holder in the future. + // context for iterating the MM container. class Iterator { public: using ListIterator = typename LruList::DListIterator; @@ -367,6 +364,7 @@ class MMTinyLFU { Iterator(const Iterator&) = delete; Iterator& operator=(const Iterator&) = delete; Iterator(Iterator&&) noexcept = default; + Iterator& operator=(Iterator&&) noexcept = default; Iterator& operator++() noexcept { ++getIter(); @@ -401,30 +399,17 @@ class MMTinyLFU { mIter_.reset(); } - // 1. Invalidate this iterator - // 2. Unlock - void destroy() { - reset(); - if (l_.owns_lock()) { - l_.unlock(); - } - } + // Invalidate this iterator + void destroy() { reset(); } // Reset this iterator to the beginning void resetToBegin() { - if (!l_.owns_lock()) { - l_.lock(); - } tIter_.resetToBegin(); mIter_.resetToBegin(); } private: - // private because it's easy to misuse and cause deadlock for MMTinyLFU - Iterator& operator=(Iterator&&) noexcept = default; - - // create an lru iterator with the lock being held. - explicit Iterator(LockHolder l, const Container& c) noexcept; + explicit Iterator(const Container& c) noexcept; const ListIterator& getIter() const noexcept { return evictTiny() ? tIter_ : mIter_; @@ -456,6 +441,40 @@ class MMTinyLFU { // Tiny and main cache iterators ListIterator tIter_; ListIterator mIter_; + }; + + class LockedIterator : public Iterator { + public: + // noncopyable but movable. + LockedIterator(const LockedIterator&) = delete; + LockedIterator& operator=(const LockedIterator&) = delete; + LockedIterator(LockedIterator&&) noexcept = default; + + // 1. Invalidate this iterator + // 2. Unlock + void destroy() { + Iterator::reset(); + if (l_.owns_lock()) { + l_.unlock(); + } + } + + // Reset this iterator to the beginning + void resetToBegin() { + if (!l_.owns_lock()) { + l_.lock(); + } + Iterator::resetToBegin(); + } + + private: + // only the container can create iterators + friend Container; + + // create an lru iterator with the lock being held. + explicit LockedIterator(LockHolder l, + const Container& c) noexcept; + // lock protecting the validity of the iterator LockHolder l_; }; @@ -489,7 +508,12 @@ class MMTinyLFU { // Obtain an iterator that start from the tail and can be used // to search for evictions. This iterator holds a lock to this // container and only one such iterator can exist at a time - Iterator getEvictionIterator() const noexcept; + LockedIterator getEvictionIterator() const noexcept; + + // Execute provided function under container lock. Function gets + // iterator passed as parameter. + template + void withEvictionIterator(F&& f); // for saving the state of the lru // diff --git a/cachelib/allocator/Refcount.h b/cachelib/allocator/Refcount.h index 3333762dbc..fb27febd3f 100644 --- a/cachelib/allocator/Refcount.h +++ b/cachelib/allocator/Refcount.h @@ -132,32 +132,28 @@ class FOLLY_PACK_ATTR RefcountWithFlags { RefcountWithFlags& operator=(RefcountWithFlags&&) = delete; // Bumps up the reference count only if the new count will be strictly less - // than or equal to the maxCount. - // @return true if refcount is bumped. false otherwise. - FOLLY_ALWAYS_INLINE bool incRef() noexcept { - Value* const refPtr = &refCount_; - unsigned int nCASFailures = 0; - constexpr bool isWeak = false; - Value oldVal = __atomic_load_n(refPtr, __ATOMIC_RELAXED); - - while (true) { - const Value newCount = oldVal + static_cast(1); - if (UNLIKELY((oldVal & kAccessRefMask) == (kAccessRefMask))) { - return false; + // than or equal to the maxCount and the item is not exclusive + // @return true if refcount is bumped. false otherwise (if item is exclusive) + // @throw exception::RefcountOverflow if new count would be greater than + // maxCount + FOLLY_ALWAYS_INLINE bool incRef() { + auto predicate = [](const Value curValue) { + Value bitMask = getAdminRef(); + + const bool exlusiveBitIsSet = curValue & bitMask; + if (UNLIKELY((curValue & kAccessRefMask) == (kAccessRefMask))) { + throw exception::RefcountOverflow("Refcount maxed out."); } - if (__atomic_compare_exchange_n(refPtr, &oldVal, newCount, isWeak, - __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) { - return true; - } + // Check if the item is not marked for eviction + return !exlusiveBitIsSet || ((curValue & kAccessRefMask) != 0); + }; - if ((++nCASFailures % 4) == 0) { - // this pause takes up to 40 clock cycles on intel and the lock cmpxchgl - // above should take about 100 clock cycles. we pause once every 400 - // cycles or so if we are extremely unlucky. - folly::asm_volatile_pause(); - } - } + auto newValue = [](const Value curValue) { + return (curValue + static_cast(1)); + }; + + return atomicUpdateValue(predicate, newValue); } // Bumps down the reference count @@ -167,33 +163,38 @@ class FOLLY_PACK_ATTR RefcountWithFlags { // @throw RefcountUnderflow when we are trying to decremenet from 0 // refcount and have a refcount leak. FOLLY_ALWAYS_INLINE Value decRef() { - Value* const refPtr = &refCount_; - unsigned int nCASFailures = 0; - constexpr bool isWeak = false; - - Value oldVal = __atomic_load_n(refPtr, __ATOMIC_RELAXED); - while (true) { - const Value newCount = oldVal - static_cast(1); - if ((oldVal & kAccessRefMask) == 0) { + auto predicate = [](const Value curValue) { + if ((curValue & kAccessRefMask) == 0) { throw exception::RefcountUnderflow( "Trying to decRef with no refcount. RefCount Leak!"); } + return true; + }; - if (__atomic_compare_exchange_n(refPtr, &oldVal, newCount, isWeak, - __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) { - return newCount & kRefMask; - } - if ((++nCASFailures % 4) == 0) { - // this pause takes up to 40 clock cycles on intel and the lock cmpxchgl - // above should take about 100 clock cycles. we pause once every 400 - // cycles or so if we are extremely unlucky - folly::asm_volatile_pause(); - } - } + Value retValue; + auto newValue = [&retValue](const Value curValue) { + retValue = (curValue - static_cast(1)); + return retValue; + }; + + auto updated = atomicUpdateValue(predicate, newValue); + XDCHECK(updated); + + return retValue & kRefMask; } - // Return refcount excluding control bits and flags - Value getAccessRef() const noexcept { return getRaw() & kAccessRefMask; } + // Return refcount excluding moving refcount, control bits and flags. + Value getAccessRef() const noexcept { + auto raw = getRaw(); + auto accessRef = raw & kAccessRefMask; + + if ((raw & getAdminRef()) && accessRef >= 1) { + // if item is moving, ignore the extra ref + return accessRef - static_cast(1); + } else { + return accessRef; + } + } // Return access ref and the admin ref bits Value getRefWithAccessAndAdmin() const noexcept { @@ -246,65 +247,143 @@ class FOLLY_PACK_ATTR RefcountWithFlags { } /** - * The following four functions are used to track whether or not - * an item is currently in the process of being moved. This happens during a - * slab rebalance or resize operation or during eviction. + * The following two functions correspond to whether or not an item is + * currently in the process of being evicted. When item is marked for + * eviction, `kExclusive` bit is set and ref count is zero. * - * An item can only be marked exclusive when `isInMMContainer` returns true - * and the item is not yet marked as exclusive. This operation is atomic. + * An item can only be marked for eviction when `isInMMContainer` and + * `isAccessible` return true and item is not already marked for eviction + * nor moving and the ref count is 0. This operation is atomic. * - * User can also query if an item "isOnlyExclusive". This returns true only - * if the refcount is 0 and only the exclusive bit is set. - * - * Unmarking exclusive does not depend on `isInMMContainer`. - * Unmarking exclusive will also return the refcount at the moment of - * unmarking. + * Unmarking eviction does not depend on `isInMMContainer` nor `isAccessible` */ - bool markExclusive() noexcept { - Value bitMask = getAdminRef(); - Value conditionBitMask = getAdminRef(); - - Value* const refPtr = &refCount_; - unsigned int nCASFailures = 0; - constexpr bool isWeak = false; - Value curValue = __atomic_load_n(refPtr, __ATOMIC_RELAXED); - while (true) { + bool markForEviction() noexcept { + auto predicate = [](const Value curValue) { + Value conditionBitMask = getAdminRef(); const bool flagSet = curValue & conditionBitMask; - const bool alreadyExclusive = curValue & bitMask; + const bool alreadyExclusive = curValue & getAdminRef(); + const bool accessible = curValue & getAdminRef(); + if (!flagSet || alreadyExclusive) { return false; } - - const Value newValue = curValue | bitMask; - if (__atomic_compare_exchange_n(refPtr, &curValue, newValue, isWeak, - __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) { - XDCHECK(newValue & conditionBitMask); - return true; + if ((curValue & kAccessRefMask) != 0) { + return false; } - - if ((++nCASFailures % 4) == 0) { - // this pause takes up to 40 clock cycles on intel and the lock cmpxchgl - // above should take about 100 clock cycles. we pause once every 400 - // cycles or so if we are extremely unlucky. - folly::asm_volatile_pause(); + if (!accessible) { + return false; } - } + + return true; + }; + + auto newValue = [](const Value curValue) { + return curValue | getAdminRef(); + }; + + return atomicUpdateValue(predicate, newValue); } - Value unmarkExclusive() noexcept { + + Value unmarkForEviction() noexcept { + XDCHECK(isMarkedForEviction()); Value bitMask = ~getAdminRef(); return __atomic_and_fetch(&refCount_, bitMask, __ATOMIC_ACQ_REL) & kRefMask; } - bool isExclusive() const noexcept { - return getRaw() & getAdminRef(); + + bool isMarkedForEviction() const noexcept { + auto raw = getRaw(); + return (raw & getAdminRef()) && ((raw & kAccessRefMask) == 0); } - bool isOnlyExclusive() const noexcept { - // An item is only exclusive when its refcount is zero and only the - // exclusive bit among all the control bits is set. This indicates an item - // is exclusive to the current thread. No other thread is allowed to - // do anything with it. + + /** + * The following functions correspond to whether or not an item is + * currently in the processed of being moved. When moving, internal + * ref count is always >= 1 and `kExclusive` bit is set. getRefCount + * does not return the extra ref (it can return 0). + * + * An item can only be marked moving when `isInMMContainer` returns true + * and item is not already marked for eviction nor moving. + * + * User can also query if an item "isOnlyMoving". This returns true only + * if the refcount is one and only the exlusive bit is set. + * + * Unmarking moving does not depend on `isInMMContainer` + */ + bool markMoving() { + auto predicate = [](const Value curValue) { + Value conditionBitMask = getAdminRef(); + const bool flagSet = curValue & conditionBitMask; + const bool alreadyExclusive = curValue & getAdminRef(); + + if (!flagSet || alreadyExclusive) { + return false; + } + if (UNLIKELY((curValue & kAccessRefMask) == (kAccessRefMask))) { + throw exception::RefcountOverflow("Refcount maxed out."); + } + + return true; + }; + + auto newValue = [](const Value curValue) { + // Set exclusive flag and make the ref count non-zero (to distinguish + // from exclusive case). This extra ref will not be reported to the + // user + return (curValue + static_cast(1)) | getAdminRef(); + }; + + return atomicUpdateValue(predicate, newValue); + } + + Value unmarkMoving() noexcept { + XDCHECK(isMoving()); + auto predicate = [](const Value curValue) { + XDCHECK((curValue & kAccessRefMask) != 0); + return true; + }; + + Value retValue; + auto newValue = [&retValue](const Value curValue) { + retValue = + (curValue - static_cast(1)) & ~getAdminRef(); + return retValue; + }; + + auto updated = atomicUpdateValue(predicate, newValue); + XDCHECK(updated); + + return retValue & kRefMask; + } + + bool isMoving() const noexcept { + auto raw = getRaw(); + return (raw & getAdminRef()) && ((raw & kAccessRefMask) != 0); + } + + /** This function attempts to mark item for eviction. + * Can only be called on the item that is moving.*/ + bool markForEvictionWhenMoving() { + XDCHECK(isMoving()); + + auto predicate = [](const Value curValue) { + return (curValue & kAccessRefMask) == 1; + }; + + auto newValue = [](const Value curValue) { + XDCHECK((curValue & kAccessRefMask) == 1); + return (curValue - static_cast(1)); + }; + + return atomicUpdateValue(predicate, newValue); + } + + bool isOnlyMoving() const noexcept { + // An item is only moving when its refcount is one and only the exclusive + // bit among all the control bits is set. This indicates an item is already + // on its way out of cache and does not need to be moved. auto ref = getRefWithAccessAndAdmin(); - bool anyOtherBitSet = ref & ~getAdminRef(); - if (anyOtherBitSet) { + Value valueWithoutExclusiveBit = ref & ~getAdminRef(); + if (valueWithoutExclusiveBit != 1) { return false; } return ref & getAdminRef(); @@ -370,6 +449,39 @@ class FOLLY_PACK_ATTR RefcountWithFlags { } private: + /** + * Helper function to modify refCount_ atomically. + * + * If predicate(currentValue) is true, then it atomically assigns result + * of newValueF(currentValue) to refCount_ and returns true. Otherwise + * returns false and leaves refCount_ unmodified. + */ + template + bool atomicUpdateValue(P&& predicate, F&& newValueF) { + Value* const refPtr = &refCount_; + unsigned int nCASFailures = 0; + constexpr bool isWeak = false; + Value curValue = __atomic_load_n(refPtr, __ATOMIC_RELAXED); + while (true) { + if (!predicate(curValue)) { + return false; + } + + const Value newValue = newValueF(curValue); + if (__atomic_compare_exchange_n(refPtr, &curValue, newValue, isWeak, + __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) { + return true; + } + + if ((++nCASFailures % 4) == 0) { + // this pause takes up to 40 clock cycles on intel and the lock cmpxchgl + // above should take about 100 clock cycles. we pause once every 400 + // cycles or so if we are extremely unlucky. + folly::asm_volatile_pause(); + } + } + } + template static Value getFlag() noexcept { static_assert(flagBit >= kNumAccessRefBits + kNumAdminRefBits, diff --git a/cachelib/allocator/nvmcache/NvmCache-inl.h b/cachelib/allocator/nvmcache/NvmCache-inl.h index b79712e607..3cef6b7ad0 100644 --- a/cachelib/allocator/nvmcache/NvmCache-inl.h +++ b/cachelib/allocator/nvmcache/NvmCache-inl.h @@ -460,19 +460,18 @@ uint32_t NvmCache::getStorageSizeInNvm(const Item& it) { } template -std::unique_ptr NvmCache::makeNvmItem(const WriteHandle& hdl) { - const auto& item = *hdl; +std::unique_ptr NvmCache::makeNvmItem(const Item& item) { auto poolId = cache_.getAllocInfo((void*)(&item)).poolId; if (item.isChainedItem()) { throw std::invalid_argument(folly::sformat( - "Chained item can not be flushed separately {}", hdl->toString())); + "Chained item can not be flushed separately {}", item.toString())); } auto chainedItemRange = - CacheAPIWrapperForNvm::viewAsChainedAllocsRange(cache_, *hdl); + CacheAPIWrapperForNvm::viewAsChainedAllocsRange(cache_, item); if (config_.encodeCb && !config_.encodeCb(EncodeDecodeContext{ - *(hdl.getInternal()), chainedItemRange})) { + const_cast(item), chainedItemRange})) { return nullptr; } @@ -496,12 +495,10 @@ std::unique_ptr NvmCache::makeNvmItem(const WriteHandle& hdl) { } template -void NvmCache::put(WriteHandle& hdl, PutToken token) { +void NvmCache::put(Item& item, PutToken token) { util::LatencyTracker tracker(stats().nvmInsertLatency_); - HashedKey hk{hdl->getKey()}; + HashedKey hk{item.getKey()}; - XDCHECK(hdl); - auto& item = *hdl; // for regular items that can only write to nvmcache upon eviction, we // should not be recording a write for an nvmclean item unless it is marked // as evicted from nvmcache. @@ -526,7 +523,7 @@ void NvmCache::put(WriteHandle& hdl, PutToken token) { return; } - auto nvmItem = makeNvmItem(hdl); + auto nvmItem = makeNvmItem(item); if (!nvmItem) { stats().numNvmPutEncodeFailure.inc(); return; diff --git a/cachelib/allocator/nvmcache/NvmCache.h b/cachelib/allocator/nvmcache/NvmCache.h index adcc8f200a..23a47f8a67 100644 --- a/cachelib/allocator/nvmcache/NvmCache.h +++ b/cachelib/allocator/nvmcache/NvmCache.h @@ -158,11 +158,11 @@ class NvmCache { PutToken createPutToken(folly::StringPiece key); // store the given item in navy - // @param hdl handle to cache item. should not be null + // @param item cache item // @param token the put token for the item. this must have been // obtained before enqueueing the put to maintain // consistency - void put(WriteHandle& hdl, PutToken token); + void put(Item& item, PutToken token); // returns the current state of whether nvmcache is enabled or not. nvmcache // can be disabled if the backend implementation ends up in a corrupt state @@ -286,7 +286,7 @@ class NvmCache { // returns true if there is tombstone entry for the key. bool hasTombStone(HashedKey hk); - std::unique_ptr makeNvmItem(const WriteHandle& handle); + std::unique_ptr makeNvmItem(const Item& item); // wrap an item into a blob for writing into navy. Blob makeBlob(const Item& it); diff --git a/cachelib/allocator/nvmcache/tests/NvmTestBase.h b/cachelib/allocator/nvmcache/tests/NvmTestBase.h index fd88875fa9..70f00f2e52 100644 --- a/cachelib/allocator/nvmcache/tests/NvmTestBase.h +++ b/cachelib/allocator/nvmcache/tests/NvmTestBase.h @@ -108,7 +108,7 @@ class NvmCacheTest : public testing::Test { void pushToNvmCacheFromRamForTesting(WriteHandle& handle) { auto nvmCache = getNvmCache(); if (nvmCache) { - nvmCache->put(handle, nvmCache->createPutToken(handle->getKey())); + nvmCache->put(*handle, nvmCache->createPutToken(handle->getKey())); } } @@ -127,7 +127,7 @@ class NvmCacheTest : public testing::Test { } std::unique_ptr makeNvmItem(const WriteHandle& handle) { - return getNvmCache()->makeNvmItem(handle); + return getNvmCache()->makeNvmItem(*handle); } std::unique_ptr createItemAsIOBuf(folly::StringPiece key, diff --git a/cachelib/allocator/tests/BaseAllocatorTest.h b/cachelib/allocator/tests/BaseAllocatorTest.h index 4b29610a18..de0a1d9cf2 100644 --- a/cachelib/allocator/tests/BaseAllocatorTest.h +++ b/cachelib/allocator/tests/BaseAllocatorTest.h @@ -4080,15 +4080,16 @@ class BaseAllocatorTest : public AllocatorTest { // Check that item is in the expected container. bool findItem(AllocatorT& allocator, typename AllocatorT::Item* item) { auto& container = allocator.getMMContainer(*item); - auto itr = container.getEvictionIterator(); bool found = false; - while (itr) { - if (itr.get() == item) { - found = true; - break; + container.withEvictionIterator([&found, &item](auto&& itr) { + while (itr) { + if (itr.get() == item) { + found = true; + break; + } + ++itr; } - ++itr; - } + }); return found; } @@ -5476,8 +5477,12 @@ class BaseAllocatorTest : public AllocatorTest { ASSERT_TRUE(big->isInMMContainer()); auto& mmContainer = alloc.getMMContainer(*big); - auto itr = mmContainer.getEvictionIterator(); - ASSERT_EQ(big.get(), &(*itr)); + + typename AllocatorT::Item* evictionCandidate = nullptr; + mmContainer.withEvictionIterator( + [&evictionCandidate](auto&& itr) { evictionCandidate = itr.get(); }); + + ASSERT_EQ(big.get(), evictionCandidate); alloc.remove("hello"); } @@ -5491,8 +5496,11 @@ class BaseAllocatorTest : public AllocatorTest { ASSERT_TRUE(small2->isInMMContainer()); auto& mmContainer = alloc.getMMContainer(*small2); - auto itr = mmContainer.getEvictionIterator(); - ASSERT_EQ(small2.get(), &(*itr)); + + typename AllocatorT::Item* evictionCandidate = nullptr; + mmContainer.withEvictionIterator( + [&evictionCandidate](auto&& itr) { evictionCandidate = itr.get(); }); + ASSERT_EQ(small2.get(), evictionCandidate); alloc.remove("hello"); } diff --git a/cachelib/allocator/tests/ItemTest.cpp b/cachelib/allocator/tests/ItemTest.cpp index b0f3a2fdec..70dd1277fe 100644 --- a/cachelib/allocator/tests/ItemTest.cpp +++ b/cachelib/allocator/tests/ItemTest.cpp @@ -83,10 +83,20 @@ TEST(ItemTest, ExpiryTime) { EXPECT_EQ(tenMins, item->getConfiguredTTL()); // Test that writes fail while the item is moving - item->markExclusive(); + result = item->markMoving(); + EXPECT_TRUE(result); + result = item->updateExpiryTime(0); + EXPECT_FALSE(result); + item->unmarkMoving(); + + // Test that writes fail while the item is marked for eviction + item->markAccessible(); + result = item->markForEviction(); + EXPECT_TRUE(result); result = item->updateExpiryTime(0); EXPECT_FALSE(result); - item->unmarkExclusive(); + item->unmarkForEviction(); + item->unmarkAccessible(); // Test that writes fail while the item is not in an MMContainer item->unmarkInMMContainer(); diff --git a/cachelib/allocator/tests/RefCountTest.cpp b/cachelib/allocator/tests/RefCountTest.cpp index b355a48a8e..d05be08c31 100644 --- a/cachelib/allocator/tests/RefCountTest.cpp +++ b/cachelib/allocator/tests/RefCountTest.cpp @@ -30,6 +30,7 @@ class RefCountTest : public AllocTestBase { public: static void testMultiThreaded(); static void testBasic(); + static void testMarkForEvictionAndMoving(); }; void RefCountTest::testMultiThreaded() { @@ -81,7 +82,7 @@ void RefCountTest::testBasic() { ASSERT_EQ(0, ref.getRaw()); ASSERT_FALSE(ref.isInMMContainer()); ASSERT_FALSE(ref.isAccessible()); - ASSERT_FALSE(ref.isExclusive()); + ASSERT_FALSE(ref.isMoving()); ASSERT_FALSE(ref.template isFlagSet()); ASSERT_FALSE(ref.template isFlagSet()); @@ -89,7 +90,7 @@ void RefCountTest::testBasic() { ref.markInMMContainer(); ASSERT_TRUE(ref.isInMMContainer()); ASSERT_FALSE(ref.isAccessible()); - ASSERT_FALSE(ref.isExclusive()); + ASSERT_FALSE(ref.isMoving()); ASSERT_EQ(0, ref.getAccessRef()); ASSERT_FALSE(ref.template isFlagSet()); ASSERT_FALSE(ref.template isFlagSet()); @@ -105,13 +106,13 @@ void RefCountTest::testBasic() { // Incrementing past the max will fail auto rawRef = ref.getRaw(); - ASSERT_FALSE(ref.incRef()); + ASSERT_THROW(ref.incRef(), std::overflow_error); ASSERT_EQ(rawRef, ref.getRaw()); // Bumping up access ref shouldn't affect admin ref and flags ASSERT_TRUE(ref.isInMMContainer()); ASSERT_FALSE(ref.isAccessible()); - ASSERT_FALSE(ref.isExclusive()); + ASSERT_FALSE(ref.isMoving()); ASSERT_EQ(RefcountWithFlags::kAccessRefMask, ref.getAccessRef()); ASSERT_TRUE(ref.template isFlagSet()); ASSERT_FALSE(ref.template isFlagSet()); @@ -128,7 +129,7 @@ void RefCountTest::testBasic() { // Bumping down access ref shouldn't affect admin ref and flags ASSERT_TRUE(ref.isInMMContainer()); ASSERT_FALSE(ref.isAccessible()); - ASSERT_FALSE(ref.isExclusive()); + ASSERT_FALSE(ref.isMoving()); ASSERT_EQ(0, ref.getAccessRef()); ASSERT_TRUE(ref.template isFlagSet()); ASSERT_FALSE(ref.template isFlagSet()); @@ -136,7 +137,7 @@ void RefCountTest::testBasic() { ref.template unSetFlag(); ASSERT_TRUE(ref.isInMMContainer()); ASSERT_FALSE(ref.isAccessible()); - ASSERT_FALSE(ref.isExclusive()); + ASSERT_FALSE(ref.isMoving()); ASSERT_EQ(0, ref.getAccessRef()); ASSERT_FALSE(ref.template isFlagSet()); ASSERT_FALSE(ref.template isFlagSet()); @@ -145,33 +146,119 @@ void RefCountTest::testBasic() { ASSERT_EQ(0, ref.getRaw()); ASSERT_FALSE(ref.isInMMContainer()); ASSERT_FALSE(ref.isAccessible()); - ASSERT_FALSE(ref.isExclusive()); + ASSERT_FALSE(ref.isMoving()); ASSERT_EQ(0, ref.getAccessRef()); ASSERT_FALSE(ref.template isFlagSet()); ASSERT_FALSE(ref.template isFlagSet()); // conditionally set flags - ASSERT_FALSE((ref.markExclusive())); + ASSERT_FALSE((ref.markMoving())); ref.markInMMContainer(); - ASSERT_TRUE((ref.markExclusive())); - ASSERT_FALSE((ref.isOnlyExclusive())); + // only first one succeeds + ASSERT_TRUE((ref.markMoving())); + ASSERT_FALSE((ref.markMoving())); ref.unmarkInMMContainer(); + ref.template setFlag(); - // Have no other admin refcount but with a flag still means "isOnlyExclusive" - ASSERT_TRUE((ref.isOnlyExclusive())); + // Have no other admin refcount but with a flag still means "isOnlyMoving" + ASSERT_TRUE((ref.isOnlyMoving())); - // Set some flags and verify that "isOnlyExclusive" does not care about flags + // Set some flags and verify that "isOnlyMoving" does not care about flags ref.markIsChainedItem(); ASSERT_TRUE(ref.isChainedItem()); - ASSERT_TRUE((ref.isOnlyExclusive())); + ASSERT_TRUE((ref.isOnlyMoving())); ref.unmarkIsChainedItem(); ASSERT_FALSE(ref.isChainedItem()); - ASSERT_TRUE((ref.isOnlyExclusive())); + ASSERT_TRUE((ref.isOnlyMoving())); +} + +void RefCountTest::testMarkForEvictionAndMoving() { + { + // cannot mark for eviction when not accessible or not in MMContainer + RefcountWithFlags ref; + ASSERT_FALSE(ref.markForEviction()); + + ref.markInMMContainer(); + ASSERT_FALSE(ref.markForEviction()); + ref.unmarkInMMContainer(); + + ref.markAccessible(); + ASSERT_FALSE(ref.markForEviction()); + } + + { + // can mark for eviction when accessible and in MMContainer + // and unmarkForEviction return value contains admin bits + RefcountWithFlags ref; + ref.markInMMContainer(); + ref.markAccessible(); + ASSERT_TRUE(ref.markForEviction()); + ASSERT_TRUE(ref.unmarkForEviction() > 0); + } + + { + // cannot mark for eviction when moving + RefcountWithFlags ref; + ref.markInMMContainer(); + ref.markAccessible(); + + ASSERT_TRUE(ref.markMoving()); + ASSERT_FALSE(ref.markForEviction()); + + ref.unmarkInMMContainer(); + ref.unmarkAccessible(); + auto ret = ref.unmarkMoving(); + ASSERT_EQ(ret, 0); + } + + { + // cannot mark moving when marked for eviction + RefcountWithFlags ref; + ref.markInMMContainer(); + ref.markAccessible(); + + ASSERT_TRUE(ref.markForEviction()); + ASSERT_FALSE(ref.markMoving()); + + ref.unmarkInMMContainer(); + ref.unmarkAccessible(); + auto ret = ref.unmarkForEviction(); + ASSERT_EQ(ret, 0); + } + + { + // can mark moving when ref count > 0 + RefcountWithFlags ref; + ref.markInMMContainer(); + ref.markAccessible(); + + ref.incRef(); + + ASSERT_TRUE(ref.markMoving()); + + ref.unmarkInMMContainer(); + ref.unmarkAccessible(); + auto ret = ref.unmarkMoving(); + ASSERT_EQ(ret, 1); + } + + { + // cannot mark for eviction when ref count > 0 + RefcountWithFlags ref; + ref.markInMMContainer(); + ref.markAccessible(); + + ref.incRef(); + ASSERT_FALSE(ref.markForEviction()); + } } } // namespace TEST_F(RefCountTest, MutliThreaded) { testMultiThreaded(); } TEST_F(RefCountTest, Basic) { testBasic(); } +TEST_F(RefCountTest, MarkForEvictionAndMoving) { + testMarkForEvictionAndMoving(); +} } // namespace tests } // namespace cachelib } // namespace facebook diff --git a/cachelib/benchmarks/MMTypeBench.h b/cachelib/benchmarks/MMTypeBench.h index 52700d650c..6382da642b 100644 --- a/cachelib/benchmarks/MMTypeBench.h +++ b/cachelib/benchmarks/MMTypeBench.h @@ -203,10 +203,11 @@ void MMTypeBench::benchRemoveIterator(unsigned int numNodes) { // // no need of iter++ since remove will do that. for (unsigned int deleted = 0; deleted < numNodes; deleted++) { - auto iter = c->getEvictionIterator(); - if (iter) { - c->remove(iter); - } + c->withEvictionIterator([this](auto&& iter) { + if (iter) { + c->remove(iter); + } + }); } } diff --git a/cachelib/cachebench/cache/Cache.h b/cachelib/cachebench/cache/Cache.h index c107b269e5..6a8f0b3342 100644 --- a/cachelib/cachebench/cache/Cache.h +++ b/cachelib/cachebench/cache/Cache.h @@ -466,7 +466,9 @@ inline typename LruAllocator::MMConfig makeMMConfig(CacheConfig const& config) { config.lruUpdateOnWrite, config.lruUpdateOnRead, config.tryLockUpdate, - static_cast(config.lruIpSpec)); + static_cast(config.lruIpSpec), + 0, + config.useCombinedLockForIterators); } // LRU @@ -480,7 +482,9 @@ inline typename Lru2QAllocator::MMConfig makeMMConfig( config.tryLockUpdate, false, config.lru2qHotPct, - config.lru2qColdPct); + config.lru2qColdPct, + 0, + config.useCombinedLockForIterators); } } // namespace cachebench diff --git a/cachelib/cachebench/runner/CacheStressor.h b/cachelib/cachebench/runner/CacheStressor.h index 492cb3ad87..c4f0e77590 100644 --- a/cachelib/cachebench/runner/CacheStressor.h +++ b/cachelib/cachebench/runner/CacheStressor.h @@ -289,16 +289,6 @@ class CacheStressor : public Stressor { try { // at the end of every operation, throttle per the config. SCOPE_EXIT { throttleFn(); }; - // detect refcount leaks when run in debug mode. -#ifndef NDEBUG - auto checkCnt = [](int cnt) { - if (cnt != 0) { - throw std::runtime_error(folly::sformat("Refcount leak {}", cnt)); - } - }; - checkCnt(cache_->getHandleCountForThread()); - SCOPE_EXIT { checkCnt(cache_->getHandleCountForThread()); }; -#endif ++stats.ops; const auto pid = static_cast(opPoolDist(gen)); diff --git a/cachelib/cachebench/util/CacheConfig.cpp b/cachelib/cachebench/util/CacheConfig.cpp index 50297955d2..1d464b4847 100644 --- a/cachelib/cachebench/util/CacheConfig.cpp +++ b/cachelib/cachebench/util/CacheConfig.cpp @@ -43,6 +43,7 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) { JSONSetVal(configJson, lruUpdateOnRead); JSONSetVal(configJson, tryLockUpdate); JSONSetVal(configJson, lruIpSpec); + JSONSetVal(configJson, useCombinedLockForIterators); JSONSetVal(configJson, lru2qHotPct); JSONSetVal(configJson, lru2qColdPct); diff --git a/cachelib/cachebench/util/CacheConfig.h b/cachelib/cachebench/util/CacheConfig.h index ea863407cf..1e1e0b1448 100644 --- a/cachelib/cachebench/util/CacheConfig.h +++ b/cachelib/cachebench/util/CacheConfig.h @@ -72,6 +72,7 @@ struct CacheConfig : public JSONConfig { bool lruUpdateOnWrite{false}; bool lruUpdateOnRead{true}; bool tryLockUpdate{false}; + bool useCombinedLockForIterators{false}; // LRU param uint64_t lruIpSpec{0}; diff --git a/website/docs/Cache_Library_Architecture_Guide/RAM_cache_indexing_and_eviction.md b/website/docs/Cache_Library_Architecture_Guide/RAM_cache_indexing_and_eviction.md index e23217e36c..363612cf17 100644 --- a/website/docs/Cache_Library_Architecture_Guide/RAM_cache_indexing_and_eviction.md +++ b/website/docs/Cache_Library_Architecture_Guide/RAM_cache_indexing_and_eviction.md @@ -109,11 +109,16 @@ It has the following major API functions: is removed form the cache (evicted or explicitly removed by the client) (`bool CacheAllocator::removeFromMMContainer(Item& item)` in `cachelib/allocator/CacheAllocator-inl.h`). -* `getEvictionIterator`: Return an iterator of items to be evicted. This is - called when the cache allocator is looking for eviction. Usually the first item - that can be evicted (no active handles, not moving, etc) is used (see +* `withEvictionIterator`: Executes callback with eviction iterator passed as a + parameter.This is called when the cache allocator is looking for eviction. + Usually the first item that can be evicted (no active handles, not moving, + etc) is used (see `CacheAllocator::findEviction(PoolId pid, ClassId cid)` in `cachelib/allocator/CacheAllocator-inl.h`). +* `withEvictionIterator`: Executes callback with eviction iterator passed as a + parameter. This is alternative for `getEvictionIterator` that offers possibility + to use combined locking. Combined locking can be turned on by setting: + `useCombinedLockForIterators` config option. The full API can be found in `struct Container` in `cachelib/allocator/MMLru.h`. This links to MMLru, which is one of the