Skip to content

Commit

Permalink
Change the way punch hole gc is scheduled
Browse files Browse the repository at this point in the history
Signed-off-by: v01dstar <[email protected]>
  • Loading branch information
v01dstar committed May 9, 2024
1 parent 75824b3 commit 7ba8634
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 12 deletions.
4 changes: 4 additions & 0 deletions src/blob_gc_picker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ std::unique_ptr<BlobGC> BasicBlobGCPicker::PickBlobGC(BlobStorage* blob_storage,

if (allow_punch_hole) {
for (auto& score : blob_storage->punch_hole_score()) {
if (info_logger_ != nullptr) {
TITAN_LOG_INFO(info_logger_, "Punch hole score %" PRIu64 " %.2f",
score.file_number, score.score);
}
if (score.score >= cf_options_.blob_file_discardable_ratio) {
break;
}
Expand Down
1 change: 1 addition & 0 deletions src/blob_gc_picker.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class BasicBlobGCPicker final : public BlobGCPicker {
TitanCFOptions cf_options_;
uint32_t cf_id_;
TitanStats* stats_;
std::shared_ptr<Logger> info_logger_;

// Check if blob_file needs to gc, return true means we need pick this
// file for gc
Expand Down
26 changes: 26 additions & 0 deletions src/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,24 @@ void TitanDBImpl::ReleaseSnapshot(const Snapshot* snapshot) {
// TODO:
// We can record here whether the oldest snapshot is released.
// If not, we can just skip the next round of purging obsolete files.
{
MutexLock l(&mutex_);
if (scheduled_punch_hole_gc_ != nullptr && !punch_hole_gc_running_ &&
scheduled_punch_hole_gc_->snapshot()->GetSequenceNumber() ==
GetOldestSnapshotSequence() &&
bg_gc_scheduled_ < db_options_.max_background_gc) {
if (db_options_.disable_background_gc) return;

if (!initialized_.load(std::memory_order_acquire)) return;

if (shuting_down_.load(std::memory_order_acquire)) return;

TITAN_LOG_INFO(db_options_.info_log,
"Titan schedule punch hole GC after releasing snapshot");
bg_gc_scheduled_++;
thread_pool_->SubmitJob(std::bind(&TitanDBImpl::BGWorkGC, this));
}
}
db_->ReleaseSnapshot(snapshot);
}

Expand Down Expand Up @@ -1421,6 +1439,14 @@ void TitanDBImpl::OnCompactionCompleted(
compaction_job_info.job_id, blob_file_size_diff.size(),
hole_punchable_blocks_diff.size());
assert(hole_punchable_blocks_diff.size() == blob_file_size_diff.size());
std::string debug;
for (const auto& file_diff : hole_punchable_blocks_diff) {
debug += "[" + std::to_string(file_diff.first) + ":" +
std::to_string(file_diff.second) + "]";
}
TITAN_LOG_INFO(db_options_.info_log,
"OnCompactionCompleted[%d]: hole_punchable_blocks_diff=%s",
compaction_job_info.job_id, debug.c_str());
} else {
TITAN_LOG_INFO(db_options_.info_log,
"OnCompactionCompleted[%d]: blob_file_size_diff.size=%zu",
Expand Down
34 changes: 22 additions & 12 deletions src/db_impl_gc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,7 @@ void TitanDBImpl::MaybeScheduleGC() {

if (shuting_down_.load(std::memory_order_acquire)) return;

while ((!gc_queue_.empty() ||
(scheduled_punch_hole_gc_ != nullptr && !punch_hole_gc_running_)) &&
while (!gc_queue_.empty() &&
bg_gc_scheduled_ < db_options_.max_background_gc) {
TITAN_LOG_INFO(db_options_.info_log, "Titan schedule GC");
bg_gc_scheduled_++;
Expand All @@ -228,10 +227,11 @@ void TitanDBImpl::BGWorkGC(void* db) {
}

void TitanDBImpl::BackgroundCallGC() {
TITAN_LOG_INFO(
db_options_.info_log,
"Titan background GC thread start, is punch hole gc running %d",
punch_hole_gc_running_);
TITAN_LOG_INFO(db_options_.info_log,
"Titan background GC thread start, is punch hole gc running "
"%d, has punch hole gc scheduled %s",
punch_hole_gc_running_,
scheduled_punch_hole_gc_ != nullptr ? "true" : "false");
TEST_SYNC_POINT("TitanDBImpl::BackgroundCallGC:BeforeGCRunning");
{
MutexLock l(&mutex_);
Expand All @@ -242,6 +242,7 @@ void TitanDBImpl::BackgroundCallGC() {
bg_gc_running_++;

TEST_SYNC_POINT("TitanDBImpl::BackgroundCallGC:BeforeBackgroundGC");
bool run_punch_hole_gc = false;
if (scheduled_punch_hole_gc_ != nullptr && !punch_hole_gc_running_) {
if (blob_file_set_->IsColumnFamilyObsolete(
scheduled_punch_hole_gc_->cf_id())) {
Expand All @@ -256,7 +257,7 @@ void TitanDBImpl::BackgroundCallGC() {
TEST_SYNC_POINT(
"TitanDBImpl::BackgroundCallGC:BeforeRunScheduledPunchHoleGC");
TITAN_LOG_INFO(db_options_.info_log,
"Titan start scheduled punch hole GC directly");
"Titan start scheduled punch hole GC");
std::unique_ptr<BlobGC> blob_gc = std::move(scheduled_punch_hole_gc_);
auto cfh = db_impl_->GetColumnFamilyHandleUnlocked(blob_gc->cf_id());
blob_gc->SetColumnFamily(cfh.get());
Expand All @@ -265,6 +266,7 @@ void TitanDBImpl::BackgroundCallGC() {
db_options_.info_log.get());
BackgroundGC(&log_buffer, blob_gc.get());
punch_hole_gc_running_ = false;
run_punch_hole_gc = true;
TEST_SYNC_POINT(
"TitanDBImpl::BackgroundCallGC:AfterRunScheduledPunchHoleGC");
{
Expand All @@ -273,8 +275,13 @@ void TitanDBImpl::BackgroundCallGC() {
LogFlush(db_options_.info_log.get());
mutex_.Lock();
}
} else {
TITAN_LOG_INFO(db_options_.info_log,
"Titan skip scheduled punch hole GC due to not holding "
"the oldest snapshot");
}
} else if (!gc_queue_.empty()) {
}
if (!run_punch_hole_gc && !gc_queue_.empty()) {
// If there is no scheduled punch hole gc, do normal gc.
uint32_t cf_id;
bool found_non_obsolete_cf = false;
Expand All @@ -301,8 +308,9 @@ void TitanDBImpl::BackgroundCallGC() {
cf_id, stats_.get());
TITAN_LOG_INFO(db_options_.info_log,
"Titan picking candidate files for GC");
auto blob_gc = blob_gc_picker->PickBlobGC(blob_storage.get(),
!punch_hole_gc_running_);
auto blob_gc = blob_gc_picker->PickBlobGC(
blob_storage.get(),
!punch_hole_gc_running_ && scheduled_punch_hole_gc_ == nullptr);
if (blob_gc != nullptr) {
assert(!blob_gc->use_punch_hole() || !punch_hole_gc_running_);
if (blob_gc->use_punch_hole()) {
Expand All @@ -314,8 +322,8 @@ void TitanDBImpl::BackgroundCallGC() {
if (blob_gc->use_punch_hole() &&
blob_gc->snapshot()->GetSequenceNumber() >
GetOldestSnapshotSequence()) {
TITAN_LOG_INFO(db_options_.info_log,
"Titan schedule punch hole GC");
TITAN_LOG_INFO(db_options_.info_log, "Titan queue punch hole GC");
assert(scheduled_punch_hole_gc_ == nullptr);
scheduled_punch_hole_gc_ = std::move(blob_gc);
} else {
if (blob_gc->use_punch_hole()) {
Expand All @@ -337,6 +345,8 @@ void TitanDBImpl::BackgroundCallGC() {
mutex_.Lock();
}
}
} else {
TITAN_LOG_INFO(db_options_.info_log, "Titan GC nothing to do");
}
}
}
Expand Down

0 comments on commit 7ba8634

Please sign in to comment.