Skip to content

Commit 0f634c3

Browse files
branch-3.0: [opt](cloud) Set thread name for all background thread workers to improve observability #49366 (#50557)
Cherry-picked from #49366 Co-authored-by: Gavin Chou <[email protected]>
1 parent 670ee45 commit 0f634c3

File tree

9 files changed

+27
-10
lines changed

9 files changed

+27
-10
lines changed

cloud/src/common/metric.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ int FdbMetricExporter::start() {
259259
[this]() { return !running_.load(std::memory_order_acquire); });
260260
}
261261
});
262+
pthread_setname_np(thread_->native_handle(), "fdb_metrics_exporter");
262263
return 0;
263264
}
264265

cloud/src/common/simple_thread_pool.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@
1717

1818
#pragma once
1919

20+
#include <pthread.h>
21+
2022
#include <atomic>
2123
#include <condition_variable>
2224
#include <iostream>
2325
#include <memory>
2426
#include <mutex>
27+
#include <string>
2528
#include <thread>
2629
#include <vector>
2730

@@ -38,9 +41,11 @@ class SimpleThreadPool {
3841
std::vector<std::thread> _worker_thread_group; // multi thread pool
3942
std::atomic<bool> _is_running;
4043
size_t _pool_size;
44+
std::string _pool_name;
4145

4246
public:
43-
SimpleThreadPool(size_t size) : _is_running(false), _pool_size(size) {
47+
SimpleThreadPool(size_t size, const std::string& name = "")
48+
: _is_running(false), _pool_size(size), _pool_name(name) {
4449
_job_queue = std::make_shared<SimpleSyncQueue<JobType>>(_pool_size * 2);
4550
}
4651

@@ -86,8 +91,11 @@ class SimpleThreadPool {
8691
int start() {
8792
_is_running = true;
8893
_worker_thread_group.clear();
94+
_pool_name = _pool_name.empty() ? "simple_thread_pool" : _pool_name;
8995
for (size_t i = 0; i < _pool_size; ++i) {
9096
_worker_thread_group.emplace_back(&SimpleThreadPool::work, this);
97+
std::string name = _pool_name + "_" + std::to_string(i);
98+
pthread_setname_np(_worker_thread_group.back().native_handle(), name.c_str());
9199
}
92100
return 0;
93101
}

cloud/src/main.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ int main(int argc, char** argv) {
310310
}
311311
};
312312
periodiccally_log_thread = std::thread {periodiccally_log};
313+
pthread_setname_np(periodiccally_log_thread.native_handle(), "recycler_periodically_log");
313314
}
314315
// start service
315316
brpc::ServerOptions options;

cloud/src/meta-service/meta_server.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ MetaServerRegister::MetaServerRegister(std::shared_ptr<TxnKv> txn_kv)
176176
}
177177
LOG(INFO) << "register thread quits";
178178
}));
179+
pthread_setname_np(register_thread_->native_handle(), "ms_register_thread");
179180
}
180181

181182
MetaServerRegister::~MetaServerRegister() {

cloud/src/meta-service/txn_kv.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ int Network::init() {
259259
bool expected = true;
260260
Network::working.compare_exchange_strong(expected, false);
261261
});
262+
pthread_setname_np(network_thread_->native_handle(), "fdb_network_thread");
262263

263264
return 0;
264265
}

cloud/src/meta-service/txn_lazy_committer.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,8 @@ std::pair<MetaServiceCode, std::string> TxnLazyCommitTask::wait() {
499499
}
500500

501501
TxnLazyCommitter::TxnLazyCommitter(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(txn_kv) {
502-
worker_pool_ = std::make_unique<SimpleThreadPool>(config::txn_lazy_commit_num_threads);
502+
worker_pool_ = std::make_unique<SimpleThreadPool>(config::txn_lazy_commit_num_threads,
503+
"txn_lazy_commiter");
503504
worker_pool_->start();
504505
}
505506

@@ -541,4 +542,4 @@ void TxnLazyCommitter::remove(int64_t txn_id) {
541542
running_tasks_.erase(txn_id);
542543
}
543544

544-
} // namespace doris::cloud
545+
} // namespace doris::cloud

cloud/src/recycler/recycler.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,12 +174,14 @@ static inline void check_recycle_task(const std::string& instance_id, const std:
174174
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
175175
ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
176176

177-
auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism);
177+
auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
178+
"s3_producer_pool");
178179
s3_producer_pool->start();
179-
auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism);
180+
auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
181+
"recycle_tablet_pool");
180182
recycle_tablet_pool->start();
181-
auto group_recycle_function_pool =
182-
std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism);
183+
auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
184+
config::recycle_pool_parallelism, "group_recycle_function_pool");
183185
group_recycle_function_pool->start();
184186
_thread_pool_group =
185187
RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
@@ -1914,8 +1916,8 @@ int InstanceRecycler::recycle_rowsets() {
19141916
// Store keys of rowset recycled by background workers
19151917
std::mutex async_recycled_rowset_keys_mutex;
19161918
std::vector<std::string> async_recycled_rowset_keys;
1917-
auto worker_pool =
1918-
std::make_unique<SimpleThreadPool>(config::instance_recycler_worker_pool_size);
1919+
auto worker_pool = std::make_unique<SimpleThreadPool>(
1920+
config::instance_recycler_worker_pool_size, "recycle_rowsets");
19191921
worker_pool->start();
19201922
auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
19211923
int64_t tablet_id, const std::string& rowset_id) {

cloud/src/recycler/recycler_service.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ void recycle_copy_jobs(const std::shared_ptr<TxnKv>& txn_kv, const std::string&
208208
std::lock_guard lock(s_worker_mtx);
209209
s_worker.erase(instance_id);
210210
});
211+
pthread_setname_np(worker.native_handle(), "recycler_worker");
211212
worker.detach();
212213
}
213214

cloud/src/recycler/s3_accessor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ int S3Accessor::init() {
240240
static std::once_flag log_annotated_tags_key_once;
241241
std::call_once(log_annotated_tags_key_once, [&]() {
242242
LOG_INFO("start s3 accessor parallel worker pool");
243-
worker_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism);
243+
worker_pool =
244+
std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism, "s3_accessor");
244245
worker_pool->start();
245246
});
246247
switch (conf_.provider) {

0 commit comments

Comments
 (0)