From 4f7a7d1f63663ba2d2042b52c6a16976f0edd571 Mon Sep 17 00:00:00 2001 From: v01dstar Date: Thu, 9 May 2024 05:35:15 +0800 Subject: [PATCH] Change the way punch hole gc is scheduled Signed-off-by: v01dstar --- src/blob_gc_picker.cc | 4 ++++ src/blob_gc_picker.h | 1 + src/db_impl.cc | 26 ++++++++++++++++++++++++++ src/db_impl_gc.cc | 28 ++++++++++++++++++---------- 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/src/blob_gc_picker.cc b/src/blob_gc_picker.cc index 1c08483f8..00615911f 100644 --- a/src/blob_gc_picker.cc +++ b/src/blob_gc_picker.cc @@ -35,6 +35,10 @@ std::unique_ptr BasicBlobGCPicker::PickBlobGC(BlobStorage* blob_storage, if (allow_punch_hole) { for (auto& score : blob_storage->punch_hole_score()) { + if (info_logger_ != nullptr) { + TITAN_LOG_INFO(info_logger_, "Punch hole score %" PRIu64 " %.2f", + score.file_number, score.score); + } if (score.score >= cf_options_.blob_file_discardable_ratio) { break; } diff --git a/src/blob_gc_picker.h b/src/blob_gc_picker.h index c0e4d379e..a6b7d183b 100644 --- a/src/blob_gc_picker.h +++ b/src/blob_gc_picker.h @@ -41,6 +41,7 @@ class BasicBlobGCPicker final : public BlobGCPicker { TitanCFOptions cf_options_; uint32_t cf_id_; TitanStats* stats_; + std::shared_ptr info_logger_; // Check if blob_file needs to gc, return true means we need pick this // file for gc diff --git a/src/db_impl.cc b/src/db_impl.cc index cd8227d01..56cafeb7a 100644 --- a/src/db_impl.cc +++ b/src/db_impl.cc @@ -841,6 +841,24 @@ void TitanDBImpl::ReleaseSnapshot(const Snapshot* snapshot) { // TODO: // We can record here whether the oldest snapshot is released. // If not, we can just skip the next round of purging obsolete files. + { + MutexLock l(&mutex_); + if (scheduled_punch_hole_gc_ != nullptr && !punch_hole_gc_running_ && + scheduled_punch_hole_gc_->snapshot()->GetSequenceNumber() == + GetOldestSnapshotSequence() && + bg_gc_scheduled_ < db_options_.max_background_gc) { + if (db_options_.disable_background_gc) return; + + if (!initialized_.load(std::memory_order_acquire)) return; + + if (shuting_down_.load(std::memory_order_acquire)) return; + + TITAN_LOG_INFO(db_options_.info_log, + "Titan schedule punch hole GC after releasing snapshot"); + bg_gc_scheduled_++; + thread_pool_->SubmitJob(std::bind(&TitanDBImpl::BGWorkGC, this)); + } + } db_->ReleaseSnapshot(snapshot); } @@ -1421,6 +1439,14 @@ void TitanDBImpl::OnCompactionCompleted( compaction_job_info.job_id, blob_file_size_diff.size(), hole_punchable_blocks_diff.size()); assert(hole_punchable_blocks_diff.size() == blob_file_size_diff.size()); + std::string debug; + for (const auto& file_diff : hole_punchable_blocks_diff) { + debug += "[" + std::to_string(file_diff.first) + ":" + + std::to_string(file_diff.second) + "]"; + } + TITAN_LOG_INFO(db_options_.info_log, + "OnCompactionCompleted[%d]: hole_punchable_blocks_diff=%s", + compaction_job_info.job_id, debug.c_str()); } else { TITAN_LOG_INFO(db_options_.info_log, "OnCompactionCompleted[%d]: blob_file_size_diff.size=%zu", diff --git a/src/db_impl_gc.cc b/src/db_impl_gc.cc index c6f3ad151..7db193881 100644 --- a/src/db_impl_gc.cc +++ b/src/db_impl_gc.cc @@ -214,8 +214,7 @@ void TitanDBImpl::MaybeScheduleGC() { if (shuting_down_.load(std::memory_order_acquire)) return; - while ((!gc_queue_.empty() || - (scheduled_punch_hole_gc_ != nullptr && !punch_hole_gc_running_)) && + while (!gc_queue_.empty() && bg_gc_scheduled_ < db_options_.max_background_gc) { TITAN_LOG_INFO(db_options_.info_log, "Titan schedule GC"); bg_gc_scheduled_++; @@ -228,10 +227,11 @@ void TitanDBImpl::BGWorkGC(void* db) { } void TitanDBImpl::BackgroundCallGC() { - TITAN_LOG_INFO( - db_options_.info_log, - "Titan background GC thread start, is punch hole gc running %d", - punch_hole_gc_running_); + TITAN_LOG_INFO(db_options_.info_log, + "Titan background GC thread start, is punch hole gc running " + "%d, has punch hole gc scheduled %s", + punch_hole_gc_running_, + scheduled_punch_hole_gc_ != nullptr ? "true" : "false"); TEST_SYNC_POINT("TitanDBImpl::BackgroundCallGC:BeforeGCRunning"); { MutexLock l(&mutex_); @@ -242,6 +242,7 @@ void TitanDBImpl::BackgroundCallGC() { bg_gc_running_++; TEST_SYNC_POINT("TitanDBImpl::BackgroundCallGC:BeforeBackgroundGC"); + bool run_punch_hole_gc = false; if (scheduled_punch_hole_gc_ != nullptr && !punch_hole_gc_running_) { if (blob_file_set_->IsColumnFamilyObsolete( scheduled_punch_hole_gc_->cf_id())) { @@ -256,7 +257,7 @@ void TitanDBImpl::BackgroundCallGC() { TEST_SYNC_POINT( "TitanDBImpl::BackgroundCallGC:BeforeRunScheduledPunchHoleGC"); TITAN_LOG_INFO(db_options_.info_log, - "Titan start scheduled punch hole GC directly"); + "Titan start scheduled punch hole GC"); std::unique_ptr blob_gc = std::move(scheduled_punch_hole_gc_); auto cfh = db_impl_->GetColumnFamilyHandleUnlocked(blob_gc->cf_id()); blob_gc->SetColumnFamily(cfh.get()); @@ -265,6 +266,7 @@ void TitanDBImpl::BackgroundCallGC() { db_options_.info_log.get()); BackgroundGC(&log_buffer, blob_gc.get()); punch_hole_gc_running_ = false; + run_punch_hole_gc = true; TEST_SYNC_POINT( "TitanDBImpl::BackgroundCallGC:AfterRunScheduledPunchHoleGC"); { @@ -273,8 +275,13 @@ void TitanDBImpl::BackgroundCallGC() { LogFlush(db_options_.info_log.get()); mutex_.Lock(); } + } else { + TITAN_LOG_INFO(db_options_.info_log, + "Titan skip scheduled punch hole GC due to not holding " + "the oldest snapshot"); } - } else if (!gc_queue_.empty()) { + } + if (!run_punch_hole_gc && !gc_queue_.empty()) { // If there is no scheduled punch hole gc, do normal gc. uint32_t cf_id; bool found_non_obsolete_cf = false; @@ -314,8 +321,7 @@ void TitanDBImpl::BackgroundCallGC() { if (blob_gc->use_punch_hole() && blob_gc->snapshot()->GetSequenceNumber() > GetOldestSnapshotSequence()) { - TITAN_LOG_INFO(db_options_.info_log, - "Titan schedule punch hole GC"); + TITAN_LOG_INFO(db_options_.info_log, "Titan queue punch hole GC"); scheduled_punch_hole_gc_ = std::move(blob_gc); } else { if (blob_gc->use_punch_hole()) { @@ -337,6 +343,8 @@ void TitanDBImpl::BackgroundCallGC() { mutex_.Lock(); } } + } else { + TITAN_LOG_INFO(db_options_.info_log, "Titan GC nothing to do"); } } }