Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using File System "Punch Hole" API for GC #313

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/titan/db.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ class TitanDB : public StackableDB {
// "rocksdb.titandb.discardable_ratio_le100_file_num" - returns count of
// file whose discardable ratio is less or equal to 100%.
static const std::string kNumDiscardableRatioLE100File;

static const std::string kNumHolePunchableBlocks;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add the comment

};

bool GetProperty(ColumnFamilyHandle* column_family, const Slice& property,
Expand Down
12 changes: 11 additions & 1 deletion include/titan/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,13 @@ struct TitanCFOptions : public ColumnFamilyOptions {
// Default: false
bool skip_value_in_compaction_filter{false};

// If set true, Titan will use hole punching to release space of unrefed
// blobs. This feature is only available on Linux with file systems that
// support hole punching, such as ext4, xfs, btrfs, etc.
//
// Default: false
bool enable_punch_hole_gc{false};

TitanCFOptions() = default;
explicit TitanCFOptions(const ColumnFamilyOptions& options)
: ColumnFamilyOptions(options) {}
Expand Down Expand Up @@ -190,7 +197,8 @@ struct ImmutableTitanCFOptions {
min_gc_batch_size(opts.min_gc_batch_size),
merge_small_file_threshold(opts.merge_small_file_threshold),
level_merge(opts.level_merge),
skip_value_in_compaction_filter(opts.skip_value_in_compaction_filter) {}
skip_value_in_compaction_filter(opts.skip_value_in_compaction_filter),
enable_punch_hole_gc(opts.enable_punch_hole_gc) {}

uint64_t blob_file_target_size;

Expand All @@ -205,6 +213,8 @@ struct ImmutableTitanCFOptions {
bool level_merge;

bool skip_value_in_compaction_filter;

bool enable_punch_hole_gc;
};

struct MutableTitanCFOptions {
Expand Down
87 changes: 87 additions & 0 deletions src/blob_aligned_blocks_collector.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include "blob_aligned_blocks_collector.h"

#include "base_db_listener.h"

namespace rocksdb {
namespace titandb {

TablePropertiesCollector*
BlobAlignedBlocksCollectorFactory::CreateTablePropertiesCollector(
rocksdb::TablePropertiesCollectorFactory::Context /* context */) {
return new BlobAlignedBlocksCollector();
}

const std::string BlobAlignedBlocksCollector::kPropertiesName =
"TitanDB.blob_aligned_blocks";

bool BlobAlignedBlocksCollector::Encode(
const std::map<uint64_t, uint64_t>& aligned_blocks, std::string* result) {
PutVarint32(result, static_cast<uint32_t>(aligned_blocks.size()));
for (const auto& f_blocks : aligned_blocks) {
PutVarint64(result, f_blocks.first);
PutVarint64(result, f_blocks.second);
}
return true;
}
bool BlobAlignedBlocksCollector::Decode(
Slice* slice, std::map<uint64_t, uint64_t>* aligned_blocks) {
uint32_t num = 0;
if (!GetVarint32(slice, &num)) {
return false;
}
uint64_t file_number;
uint64_t size;
for (uint32_t i = 0; i < num; ++i) {
if (!GetVarint64(slice, &file_number)) {
return false;
}
if (!GetVarint64(slice, &size)) {
return false;
}
(*aligned_blocks)[file_number] = size;
}
return true;
}

Status BlobAlignedBlocksCollector::AddUserKey(const Slice& /* key */,
const Slice& value,
EntryType type,
SequenceNumber /* seq */,
uint64_t /* file_size */) {
if (type != kEntryBlobIndex) {
return Status::OK();
}

Slice copy = value;

BlobIndex index;
auto s = index.DecodeFrom(const_cast<Slice*>(&copy));
if (!s.ok()) {
return s;
}

auto iter = aligned_blocks_.find(index.file_number);
if (iter == aligned_blocks_.end()) {
aligned_blocks_[index.file_number] = index.blob_handle.size / 4096 + 1;
} else {
iter->second += index.blob_handle.size / 4096 + 1;
}

return Status::OK();
}

Status BlobAlignedBlocksCollector::Finish(UserCollectedProperties* properties) {
if (aligned_blocks_.empty()) {
return Status::OK();
}

std::string res;
bool ok __attribute__((__unused__)) = Encode(aligned_blocks_, &res);
assert(ok);
assert(!res.empty());
properties->emplace(std::make_pair(kPropertiesName, res));
return Status::OK();
}

} // namespace titandb
} // namespace rocksdb
56 changes: 56 additions & 0 deletions src/blob_aligned_blocks_collector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#pragma once

#include "rocksdb/listener.h"
#include "rocksdb/table_properties.h"
#include "util/coding.h"

#include "blob_file_set.h"
#include "db_impl.h"

// BlobAlignedBlocksCollector is a TablePropertiesCollector that collects
// the mapping from file number to the number of aligned blocks in the file.
// This information is used by punch hole GC. This is not the same as the
// live_data_size. Because, to use punch hole GC, blobs have to be aligned to
// the file system block size (so that the file is still parsable after holes
// are punched). This is basically live_data_size plus the size of all the
// padding bytes divided by the file system block size.

namespace rocksdb {
namespace titandb {
class BlobAlignedBlocksCollectorFactory final
: public TablePropertiesCollectorFactory {
public:
TablePropertiesCollector* CreateTablePropertiesCollector(
TablePropertiesCollectorFactory::Context context) override;

const char* Name() const override { return "BlobAlignedBlocksCollector"; }

std::shared_ptr<Logger> info_logger_;
};

class BlobAlignedBlocksCollector final : public TablePropertiesCollector {
public:
const static std::string kPropertiesName;

static bool Encode(const std::map<uint64_t, uint64_t>& aligned_blocks,
std::string* result);
static bool Decode(Slice* slice,
std::map<uint64_t, uint64_t>* aligned_blocks);

Status AddUserKey(const Slice& key, const Slice& value, EntryType type,
SequenceNumber seq, uint64_t file_size) override;
Status Finish(UserCollectedProperties* properties) override;
UserCollectedProperties GetReadableProperties() const override {
return UserCollectedProperties();
}
const char* Name() const override { return "BlobAlignedBlocksCollector"; }

BlobAlignedBlocksCollector() {}

private:
std::map<uint64_t, uint64_t> aligned_blocks_;
std::shared_ptr<Logger> info_logger_;
};

} // namespace titandb
} // namespace rocksdb
40 changes: 38 additions & 2 deletions src/blob_file_builder.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include "iostream"

#include "blob_file_builder.h"

#include "table/block_based/block_based_table_reader.h"
Expand Down Expand Up @@ -33,19 +35,24 @@ BlobFileBuilder::BlobFileBuilder(const TitanDBOptions& db_options,
return;
#endif
}
// alignment_size_ = cf_options_.alignment_size;
alignment_size_ = cf_options.enable_punch_hole_gc ? 4 * 1024 : 0;
WriteHeader();
}

void BlobFileBuilder::WriteHeader() {
BlobFileHeader header;
header.version = blob_file_version_;
if (cf_options_.blob_file_compression_options.max_dict_bytes > 0) {
assert(blob_file_version_ == BlobFileHeader::kVersion2);
assert(blob_file_version_ >= BlobFileHeader::kVersion2);
header.flags |= BlobFileHeader::kHasUncompressionDictionary;
}
std::string buffer;
header.EncodeTo(&buffer);
status_ = file_->Append(buffer);
if (alignment_size_ > 0) {
FillBlockWithPadding();
}
}

void BlobFileBuilder::Add(const BlobRecord& record,
Expand Down Expand Up @@ -143,11 +150,39 @@ void BlobFileBuilder::WriteEncoderData(BlobHandle* handle) {
handle->offset = file_->GetFileSize();
handle->size = encoder_.GetEncodedSize();
live_data_size_ += handle->size;
if (alignment_size_ > 0) {
live_blocks_ += handle->size / alignment_size_ +
(handle->size % alignment_size_ ? 1 : 0);
}

status_ = file_->Append(encoder_.GetHeader());
if (ok()) {
status_ = file_->Append(encoder_.GetRecord());
num_entries_++;
if (ok()) {
FillBlockWithPadding();
}
}
}

void BlobFileBuilder::FillBlockWithPadding() {
if (alignment_size_ == 0) {
return;
}
size_t padding = 0;
if (file_->GetFileSize() % alignment_size_ != 0) {
padding = alignment_size_ - file_->GetFileSize() % alignment_size_;
}
if (padding > 0) {
char buf[4096] = {0};
while (padding > sizeof(buf)) {
status_ = file_->Append(Slice(buf, sizeof(buf)));
if (!ok()) {
return;
}
padding -= sizeof(buf);
}
status_ = file_->Append(Slice(buf, padding));
}
}

Expand Down Expand Up @@ -190,13 +225,14 @@ Status BlobFileBuilder::Finish(OutContexts* out_ctx) {
BlobFileFooter footer;
// if has compression dictionary, encode it into meta blocks
if (cf_options_.blob_file_compression_options.max_dict_bytes > 0) {
assert(blob_file_version_ == BlobFileHeader::kVersion2);
assert(blob_file_version_ >= BlobFileHeader::kVersion2);
BlockHandle meta_index_handle;
MetaIndexBuilder meta_index_builder;
WriteCompressionDictBlock(&meta_index_builder);
WriteRawBlock(meta_index_builder.Finish(), &meta_index_handle);
footer.meta_index_handle = meta_index_handle;
}
footer.alignment_size = alignment_size_;

std::string buffer;
footer.EncodeTo(&buffer);
Expand Down
8 changes: 7 additions & 1 deletion src/blob_file_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class BlobFileBuilder {
// caller to sync and close the file after calling Finish().
BlobFileBuilder(const TitanDBOptions& db_options,
const TitanCFOptions& cf_options, WritableFileWriter* file,
uint32_t blob_file_version = BlobFileHeader::kVersion2);
uint32_t blob_file_version = BlobFileHeader::kVersion3);

// Tries to add the record to the file
// Notice:
Expand Down Expand Up @@ -109,6 +109,8 @@ class BlobFileBuilder {
const std::string& GetLargestKey() { return largest_key_; }

uint64_t live_data_size() const { return live_data_size_; }
uint64_t live_blocks() const { return live_blocks_; }
uint64_t alignment_size() const { return alignment_size_; }

private:
BuilderState builder_state_;
Expand All @@ -123,6 +125,7 @@ class BlobFileBuilder {
void WriteCompressionDictBlock(MetaIndexBuilder* meta_index_builder);
void FlushSampleRecords(OutContexts* out_ctx);
void WriteEncoderData(BlobHandle* handle);
void FillBlockWithPadding();

TitanCFOptions cf_options_;
WritableFileWriter* file_;
Expand All @@ -142,6 +145,9 @@ class BlobFileBuilder {
std::string smallest_key_;
std::string largest_key_;
uint64_t live_data_size_ = 0;
uint64_t live_blocks_ = 0;

uint64_t alignment_size_ = 0;
};

} // namespace titandb
Expand Down
Loading