Skip to content

Commit c19691b

Browse files
decsterwanpengfei-git
authored andcommitted
[Enhancement] Make pk table compaction's rowset size threshold large and configurable (#23843)
If size of PK tablets' rowset is greater than 32M(currently fixed value) and there are no deletes on the rowsets, these rowsets won't selected for compaction, for table's with very large rows(some column is very large while most columns are small), a rowset may contain very few rows(e.g 32M bytes, each row 16K, so only 2000 row per file), this is inefficient for scan of small columns. This PR makes the rowset selection threshold configurable, by adding a new config `update_compaction_size_threshold`(default now 256M vs original 32M), rowsets whose size below this size will be selected for compaction. Note this PR changes compaction behavior, after this PR, some previously uncompacted rowsets will be compacted, which may increase compaction load. (cherry picked from commit 89d4c37) Signed-off-by: Binglin Chang <[email protected]>
1 parent bad1100 commit c19691b

File tree

3 files changed

+9
-9
lines changed

3 files changed

+9
-9
lines changed

be/src/common/config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ CONF_mInt32(update_compaction_check_interval_seconds, "60");
300300
CONF_mInt32(update_compaction_num_threads_per_disk, "1");
301301
CONF_Int32(update_compaction_per_tablet_min_interval_seconds, "120"); // 2min
302302
CONF_mInt64(max_update_compaction_num_singleton_deltas, "1000");
303+
CONF_mInt64(update_compaction_size_threshold, "268435456");
303304

304305
CONF_mInt32(repair_compaction_interval_seconds, "600"); // 10 min
305306
CONF_Int32(manual_compaction_threads, "4");

be/src/storage/tablet_updates.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,7 +1865,7 @@ int64_t TabletUpdates::get_compaction_score() {
18651865
}
18661866
rowsets = _edit_version_infos[_apply_version_idx]->rowsets;
18671867
}
1868-
int64_t total_score = -_compaction_cost_seek;
1868+
int64_t total_score = -config::update_compaction_size_threshold;
18691869
bool has_error = false;
18701870
{
18711871
std::lock_guard lg(_rowset_stats_lock);
@@ -1912,7 +1912,6 @@ static string int_list_to_string(const vector<uint32_t>& l) {
19121912
return ret;
19131913
}
19141914

1915-
static const size_t compaction_result_bytes_threashold = 1000000000;
19161915
static const size_t compaction_result_rows_threashold = 10000000;
19171916

19181917
Status TabletUpdates::compaction(MemTracker* mem_tracker) {
@@ -1942,7 +1941,7 @@ Status TabletUpdates::compaction(MemTracker* mem_tracker) {
19421941
size_t total_bytes = 0;
19431942
size_t total_rows_after_compaction = 0;
19441943
size_t total_bytes_after_compaction = 0;
1945-
int64_t total_score = -_compaction_cost_seek;
1944+
int64_t total_score = -config::update_compaction_size_threshold;
19461945
vector<CompactionEntry> candidates;
19471946
{
19481947
std::lock_guard lg(_rowset_stats_lock);
@@ -1980,7 +1979,7 @@ Status TabletUpdates::compaction(MemTracker* mem_tracker) {
19801979
size_t new_rows = total_rows_after_compaction + e.num_rows - e.num_dels;
19811980
size_t new_bytes = total_bytes_after_compaction + e.bytes * (e.num_rows - e.num_dels) / e.num_rows;
19821981
if (info->inputs.size() > 0 && (new_rows > compaction_result_rows_threashold * 3 / 2 ||
1983-
new_bytes > compaction_result_bytes_threashold * 3 / 2)) {
1982+
new_bytes > config::update_compaction_size_threshold * 3 / 2)) {
19841983
break;
19851984
}
19861985
info->inputs.push_back(e.rowsetid);
@@ -1989,7 +1988,7 @@ Status TabletUpdates::compaction(MemTracker* mem_tracker) {
19891988
total_bytes += e.bytes;
19901989
total_rows_after_compaction = new_rows;
19911990
total_bytes_after_compaction = new_bytes;
1992-
if (total_bytes_after_compaction > compaction_result_bytes_threashold ||
1991+
if (total_bytes_after_compaction > config::update_compaction_size_threshold ||
19931992
total_rows_after_compaction > compaction_result_rows_threashold ||
19941993
info->inputs.size() >= config::max_update_compaction_num_singleton_deltas) {
19951994
break;
@@ -2257,16 +2256,17 @@ void TabletUpdates::get_compaction_status(std::string* json_result) {
22572256
}
22582257

22592258
void TabletUpdates::_calc_compaction_score(RowsetStats* stats) {
2260-
if (stats->num_rows < 10) {
2261-
stats->compaction_score = _compaction_cost_seek;
2259+
if (stats->num_rows == 0) {
2260+
stats->compaction_score = config::update_compaction_size_threshold;
22622261
return;
22632262
}
22642263
// TODO(cbl): estimate read/write cost, currently just use fixed value
22652264
const int64_t cost_record_write = 1;
22662265
const int64_t cost_record_read = 4;
22672266
// use double to prevent overflow
22682267
auto delete_bytes = (int64_t)(stats->byte_size * (double)stats->num_dels / stats->num_rows);
2269-
stats->compaction_score = _compaction_cost_seek + (cost_record_read + cost_record_write) * delete_bytes -
2268+
stats->compaction_score = config::update_compaction_size_threshold +
2269+
(cost_record_read + cost_record_write) * delete_bytes -
22702270
cost_record_write * stats->byte_size;
22712271
}
22722272

be/src/storage/tablet_updates.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,6 @@ class TabletUpdates {
429429
int64_t _last_compaction_time_ms = 0;
430430
std::atomic<int64_t> _last_compaction_success_millis{0};
431431
std::atomic<int64_t> _last_compaction_failure_millis{0};
432-
int64_t _compaction_cost_seek = 32 * 1024 * 1024; // 32MB
433432

434433
mutable std::mutex _rowset_stats_lock;
435434
// maintain current version(applied version) rowsets' stats

0 commit comments

Comments
 (0)