Skip to content

Commit f7ded03

Browse files
committed
Significantly refactored trace aggregator
Previously we were leaking ThreadTracer objects in the TraceAggregator as creating new threads means ThreadTracer gets pushed into TraceAggregator but it is never removed. This causes a memory leak and also makes the TraceAggregator slower. This refactors the entire code to make this work. Also removed the dependency of Thread on the App* pointer, which is unsafe and can cause segfaults in some extreme situations.
1 parent 2f1cb13 commit f7ded03

File tree

12 files changed

+226
-385
lines changed

12 files changed

+226
-385
lines changed

.clang-tidy

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ cert-*,
2121
-readability-identifier-length,
2222
-readability-isolate-declaration,
2323
-readability-magic-numbers,
24-
-readability-redundant-inline-specifier'
24+
-readability-redundant-inline-specifier,
25+
-readability-use-anyofallof'
2526
# TODO: Re-enable bugprone-exception-escape when no longer throwing
2627
# https://github.com/isocpp/CppCoreGuidelines/issues/1589
2728
WarningsAsErrors: '*'

examples/tracing_example_no_rt/main.cc

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include <cactus_rt/tracing.h>
22

3-
#include <list>
43
#include <memory>
54
#include <thread>
65

@@ -25,18 +24,15 @@ void StartTracing(const char* app_name, const char* filename) {
2524

2625
// Create the file sink so the data aggregated by the TraceAggregator will be written to somewhere.
2726
auto file_sink = std::make_shared<FileSink>(filename);
28-
trace_aggregator->RegisterSink(file_sink);
2927

3028
quill::start();
31-
trace_aggregator->Start();
29+
trace_aggregator->Start(file_sink);
3230
}
3331

3432
void StopTracing() {
3533
cactus_rt::tracing::DisableTracing();
3634

37-
trace_aggregator->RequestStop();
38-
trace_aggregator->Join();
39-
trace_aggregator = nullptr; // Destroy the trace aggregator and free all resources.
35+
trace_aggregator->Stop();
4036
}
4137

4238
int main() {

include/cactus_rt/app.h

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
#include <gtest/gtest_prod.h>
55

6-
#include <list>
76
#include <memory>
87
#include <string>
98
#include <vector>
@@ -36,14 +35,7 @@ class App {
3635

3736
std::vector<std::shared_ptr<Thread>> threads_;
3837

39-
// We need to cache the list thread tracers here because the trace_aggregator
40-
// can be dynamically created and stopped. When a new trace aggregator is
41-
// created, it needs to know about all the thread tracers.
42-
//
43-
// TODO: investigate into a weak pointer.
44-
std::list<std::shared_ptr<tracing::ThreadTracer>> thread_tracers_;
45-
std::unique_ptr<tracing::TraceAggregator> trace_aggregator_ = nullptr;
46-
std::mutex aggregator_mutex_;
38+
std::shared_ptr<tracing::TraceAggregator> trace_aggregator_;
4739

4840
void SetDefaultLogFormat(quill::Config& cfg) {
4941
// Create a handler of stdout
@@ -117,11 +109,6 @@ class App {
117109
*/
118110
bool StartTraceSession(std::shared_ptr<tracing::Sink> sink) noexcept;
119111

120-
/**
121-
* @brief Register a custom trace sink after starting the trace session
122-
*/
123-
void RegisterTraceSink(std::shared_ptr<tracing::Sink> sink) noexcept;
124-
125112
/**
126113
* @brief Stops the tracing session for the process. Will be no-op if tracing
127114
* is not enabled. This function is not real-time safe.
@@ -148,18 +135,6 @@ class App {
148135
void StartQuill();
149136

150137
private:
151-
/**
152-
* @brief Register a thread tracer. Should only be called from Thread::RunThread.
153-
*/
154-
void RegisterThreadTracer(std::shared_ptr<tracing::ThreadTracer> thread_tracer) noexcept;
155-
156-
/**
157-
* @brief Remove a thread tracer. Should only be called from Thread::~Thread().
158-
*/
159-
void DeregisterThreadTracer(const std::shared_ptr<tracing::ThreadTracer>& thread_tracer) noexcept;
160-
161-
void CreateAndStartTraceAggregator(std::shared_ptr<tracing::Sink> sink) noexcept;
162-
163138
void StopTraceAggregator() noexcept;
164139
};
165140
} // namespace cactus_rt

include/cactus_rt/thread.h

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,25 @@
77
#include <cstdint>
88
#include <memory>
99
#include <string>
10-
#include <vector>
1110

1211
#include "config.h"
1312
#include "quill/Quill.h"
1413
#include "tracing/thread_tracer.h"
14+
#include "tracing/trace_aggregator.h"
1515

1616
namespace cactus_rt {
1717

1818
/// @private
1919
constexpr size_t kDefaultStackSize = 8 * 1024 * 1024; // 8MB default stack space should be plenty
2020

21-
// Necessary forward declaration
22-
class App;
23-
2421
class Thread {
2522
ThreadConfig config_;
2623
std::string name_;
2724
std::vector<size_t> cpu_affinity_;
2825
size_t stack_size_;
2926

3027
quill::Logger* logger_;
31-
std::shared_ptr<tracing::ThreadTracer> tracer_;
28+
std::shared_ptr<tracing::ThreadTracer> tracer_ = nullptr;
3229

3330
std::atomic_bool stop_requested_ = false;
3431

@@ -41,12 +38,10 @@ class Thread {
4138
*/
4239
static void* RunThread(void* data);
4340

44-
friend class App;
45-
46-
// Non-owning App pointer. Used only for notifying that the thread has
47-
// started/stopped for tracing purposes. Set by Thread::Start and read at
41+
// Non-owning TraceAggregator pointer. Used only for notifying that the thread
42+
// has started/stopped for tracing purposes. Set by Thread::Start and read at
4843
// the beginning of Thread::RunThread.
49-
App* app_ = nullptr;
44+
std::weak_ptr<tracing::TraceAggregator> trace_aggregator_;
5045

5146
public:
5247
/**
@@ -60,8 +55,7 @@ class Thread {
6055
name_(name),
6156
cpu_affinity_(config_.cpu_affinity),
6257
stack_size_(static_cast<size_t>(PTHREAD_STACK_MIN) + config_.stack_size),
63-
logger_(quill::create_logger(name_)),
64-
tracer_(std::make_shared<tracing::ThreadTracer>(name, config_.tracer_config.queue_size)) {
58+
logger_(quill::create_logger(name_)) {
6559
if (!config.scheduler) {
6660
throw std::runtime_error("ThreadConfig::scheduler cannot be nullptr");
6761
}
@@ -123,12 +117,16 @@ class Thread {
123117
*
124118
* @private
125119
*/
126-
inline void SetApp(App* app) {
127-
app_ = app;
120+
inline void SetTraceAggregator(std::weak_ptr<tracing::TraceAggregator> trace_aggregator) {
121+
trace_aggregator_ = trace_aggregator;
128122
}
129123

130124
protected:
131-
inline quill::Logger* Logger() const { return logger_; }
125+
inline quill::Logger* Logger() const { return logger_; }
126+
127+
/**
128+
* Gets the current tracer object. Should only ever be called from within the thread itself.
129+
*/
132130
inline tracing::ThreadTracer& Tracer() { return *tracer_; }
133131
inline int64_t StartMonotonicTimeNs() const { return start_monotonic_time_ns_; }
134132
inline const ThreadConfig& Config() const noexcept { return config_; }

include/cactus_rt/tracing/thread_tracer.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef CACTUS_TRACING_THREAD_TRACER_H_
22
#define CACTUS_TRACING_THREAD_TRACER_H_
33

4+
#include <atomic>
45
#ifndef CACTUS_RT_TRACING_ENABLED
56
#include "thread_tracer.disabled.h"
67
#else
@@ -36,6 +37,8 @@ class ThreadTracer {
3637

3738
moodycamel::ReaderWriterQueue<TrackEventInternal> queue_;
3839

40+
std::atomic_bool thread_done_;
41+
3942
// The event name interning must be done per thread (per sequence). Thus it is
4043
// stored here. However, this class must NEVER call functions here (other
4144
// than maybe .Size), as the memory allocation can occur. This variable is
@@ -84,6 +87,26 @@ class ThreadTracer {
8487
*/
8588
void SetTid() noexcept { tid_ = gettid(); }
8689

90+
/**
91+
* @brief This marks this thread tracer as "done" and thus the trace
92+
* aggregator will try to remove it after flushing the data.
93+
*
94+
* @private
95+
*/
96+
void MarkDone() noexcept {
97+
thread_done_.store(true, std::memory_order_release);
98+
}
99+
100+
/**
101+
* @brief Checks if this thread tracer is done. Should only be called from
102+
* TraceAggregator.
103+
*
104+
* @private
105+
*/
106+
bool IsDone() noexcept {
107+
return thread_done_.load(std::memory_order_acquire);
108+
}
109+
87110
private:
88111
template <typename... Args>
89112
bool Emit(Args&&... args) noexcept;

0 commit comments

Comments
 (0)