Skip to content

Commit 9f398f2

Browse files
committed
Allow use of CPED to store sampling context
1 parent e28fc06 commit 9f398f2

File tree

3 files changed

+180
-41
lines changed

3 files changed

+180
-41
lines changed

bindings/profilers/wall.cc

Lines changed: 124 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ using namespace v8;
5858

5959
namespace dd {
6060

61+
using ContextPtr = std::shared_ptr<Global<Value>>;
62+
6163
// Maximum number of rounds in the GetV8ToEpochOffset
6264
static constexpr int MAX_EPOCH_OFFSET_ATTEMPTS = 20;
6365

@@ -321,8 +323,7 @@ void SignalHandler::HandleProfilerSignal(int sig,
321323
auto time_from = Now();
322324
old_handler(sig, info, context);
323325
auto time_to = Now();
324-
double async_id = node::AsyncHooksGetExecutionAsyncId(isolate);
325-
prof->PushContext(time_from, time_to, cpu_time, async_id);
326+
prof->PushContext(time_from, time_to, cpu_time, isolate);
326327
}
327328
#else
328329
class SignalHandler {
@@ -484,14 +485,24 @@ ContextsByNode WallProfiler::GetContextsByNode(CpuProfile* profile,
484485
return contextsByNode;
485486
}
486487

488+
void GCPrologueCallback(Isolate* isolate, GCType type, GCCallbackFlags flags, void* data) {
489+
static_cast<WallProfiler*>(data)->OnGCStart();
490+
}
491+
492+
void GCEpilogueCallback(Isolate* isolate, GCType type, GCCallbackFlags flags, void* data) {
493+
static_cast<WallProfiler*>(data)->OnGCEnd();
494+
}
495+
487496
WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
488497
std::chrono::microseconds duration,
489498
bool includeLines,
490499
bool withContexts,
491500
bool workaroundV8Bug,
492501
bool collectCpuTime,
493-
bool isMainThread)
502+
bool isMainThread,
503+
bool useCPED)
494504
: samplingPeriod_(samplingPeriod),
505+
useCPED_(useCPED),
495506
includeLines_(includeLines),
496507
withContexts_(withContexts),
497508
isMainThread_(isMainThread) {
@@ -506,7 +517,6 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
506517
contexts_.reserve(duration * 2 / samplingPeriod);
507518
}
508519

509-
curContext_.store(&context1_, std::memory_order_relaxed);
510520
collectionMode_.store(CollectionMode::kNoCollect, std::memory_order_relaxed);
511521

512522
auto isolate = v8::Isolate::GetCurrent();
@@ -515,13 +525,16 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
515525

516526
v8::Local<v8::Uint32Array> jsArray =
517527
v8::Uint32Array::New(buffer, 0, kFieldCount);
518-
#if (V8_MAJOR_VERSION >= 8)
519528
fields_ = static_cast<uint32_t*>(buffer->GetBackingStore()->Data());
520-
#else
521-
fields_ = static_cast<uint32_t*>(buffer->GetContents().Data());
522-
#endif
523529
jsArray_ = v8::Global<v8::Uint32Array>(isolate, jsArray);
524530
std::fill(fields_, fields_ + kFieldCount, 0);
531+
532+
gcCount = 0;
533+
if (useCPED_) {
534+
cpedSymbol_ = v8::Global<v8::Symbol>(isolate, v8::Symbol::New(isolate));
535+
isolate->AddGCPrologueCallback(&GCPrologueCallback, this);
536+
isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this);
537+
}
525538
}
526539

527540
WallProfiler::~WallProfiler() {
@@ -535,6 +548,10 @@ void WallProfiler::Dispose(Isolate* isolate) {
535548

536549
g_profilers.RemoveProfiler(isolate, this);
537550
}
551+
if (isolate != nullptr && useCPED_) {
552+
isolate->RemoveGCPrologueCallback(&GCPrologueCallback, this);
553+
isolate->RemoveGCEpilogueCallback(&GCEpilogueCallback, this);
554+
}
538555
}
539556

540557
NAN_METHOD(WallProfiler::New) {
@@ -647,13 +664,23 @@ NAN_METHOD(WallProfiler::New) {
647664
"Include line option is not compatible with contexts.");
648665
}
649666

667+
auto useCPEDValue =
668+
Nan::Get(arg, Nan::New<v8::String>("useCPED").ToLocalChecked());
669+
if (useCPEDValue.IsEmpty() ||
670+
!useCPEDValue.ToLocalChecked()->IsBoolean()) {
671+
return Nan::ThrowTypeError("useCPED must be a boolean.");
672+
}
673+
bool useCPED =
674+
useCPEDValue.ToLocalChecked().As<v8::Boolean>()->Value();
675+
650676
WallProfiler* obj = new WallProfiler(interval,
651677
duration,
652678
lineNumbers,
653679
withContexts,
654680
workaroundV8Bug,
655681
collectCpuTime,
656-
isMainThread);
682+
isMainThread,
683+
useCPED);
657684
obj->Wrap(info.This());
658685
info.GetReturnValue().Set(info.This());
659686
} else {
@@ -966,28 +993,98 @@ v8::CpuProfiler* WallProfiler::CreateV8CpuProfiler() {
966993
}
967994

968995
v8::Local<v8::Value> WallProfiler::GetContext(Isolate* isolate) {
969-
auto context = *curContext_.load(std::memory_order_relaxed);
996+
auto context = GetContextPtr(isolate);
970997
if (!context) return v8::Undefined(isolate);
971998
return context->Get(isolate);
972999
}
9731000

1001+
class PersistentContextPtr : AtomicContextPtr {
1002+
Persistent<Object> per;
1003+
1004+
void BindLifecycleTo(Isolate* isolate, Local<Object>& obj) {
1005+
// Register a callback to delete this object when the object is GCed
1006+
per.Reset(isolate, obj);
1007+
per.SetWeak(this, [](const WeakCallbackInfo<PersistentContextPtr>& data) {
1008+
auto &per = data.GetParameter()->per;
1009+
if (!per.IsEmpty()) {
1010+
per.ClearWeak();
1011+
per.Reset();
1012+
}
1013+
// Using SetSecondPassCallback as shared_ptr can trigger ~Global and any V8 API use needs to be in the second pass
1014+
data.SetSecondPassCallback([](const WeakCallbackInfo<PersistentContextPtr>& data) {
1015+
delete data.GetParameter();
1016+
});
1017+
}, WeakCallbackType::kParameter);
1018+
}
1019+
1020+
friend class WallProfiler;
1021+
};
1022+
9741023
void WallProfiler::SetContext(Isolate* isolate, Local<Value> value) {
975-
// Need to be careful here, because we might be interrupted by a
976-
// signal handler that will make use of curContext_.
977-
// Update of shared_ptr is not atomic, so instead we use a pointer
978-
// (curContext_) that points on two shared_ptr (context1_ and context2_),
979-
// update the shared_ptr that is not currently in use and then atomically
980-
// update curContext_.
981-
auto newCurContext = curContext_.load(std::memory_order_relaxed) == &context1_
982-
? &context2_
983-
: &context1_;
984-
if (!value->IsNullOrUndefined()) {
985-
*newCurContext = std::make_shared<Global<Value>>(isolate, value);
1024+
if (!useCPED_) {
1025+
curContext_.Set(isolate, value);
1026+
return;
1027+
}
1028+
1029+
auto cped = isolate->GetContinuationPreservedEmbedderData();
1030+
// No Node AsyncContextFrame in this continuation yet
1031+
if (!cped->IsObject()) return;
1032+
1033+
auto cpedObj = cped.As<Object>();
1034+
auto localSymbol = cpedSymbol_.Get(isolate);
1035+
auto v8Ctx = isolate->GetCurrentContext();
1036+
auto maybeProfData = cpedObj->Get(v8Ctx, localSymbol);
1037+
if (maybeProfData.IsEmpty()) return;
1038+
auto profData = maybeProfData.ToLocalChecked();
1039+
1040+
PersistentContextPtr* contextPtr = nullptr;
1041+
if (profData->IsUndefined()) {
1042+
contextPtr = new PersistentContextPtr();
1043+
1044+
auto maybeSetResult = cpedObj->Set(v8Ctx, localSymbol, External::New(isolate, contextPtr));
1045+
if (maybeSetResult.IsNothing()) {
1046+
delete contextPtr;
1047+
return;
1048+
}
1049+
contextPtr->BindLifecycleTo(isolate, cpedObj);
9861050
} else {
987-
newCurContext->reset();
1051+
contextPtr = static_cast<PersistentContextPtr*>(profData.As<External>()->Value());
9881052
}
989-
std::atomic_signal_fence(std::memory_order_release);
990-
curContext_.store(newCurContext, std::memory_order_relaxed);
1053+
1054+
contextPtr->Set(isolate, value);
1055+
}
1056+
1057+
ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) {
1058+
if (!useCPED_) {
1059+
// Not strictly necessary but we can avoid HandleScope creation for this case.
1060+
return curContext_.Get();
1061+
}
1062+
1063+
if (gcCount == 0) {
1064+
auto handleScope = HandleScope(isolate);
1065+
return GetContextPtr(isolate);
1066+
}
1067+
1068+
return gcContext;
1069+
}
1070+
1071+
ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) {
1072+
if (!useCPED_) {
1073+
return curContext_.Get();
1074+
}
1075+
1076+
auto cped = isolate->GetContinuationPreservedEmbedderData(); // signal safe?
1077+
if (!cped->IsObject()) return std::shared_ptr<Global<Value>>();
1078+
1079+
auto cpedObj = cped.As<Object>();
1080+
auto localSymbol = cpedSymbol_.Get(isolate); // signal safe?
1081+
auto maybeProfData = cpedObj->Get(isolate->GetEnteredOrMicrotaskContext(), localSymbol); // signal safe?
1082+
if (maybeProfData.IsEmpty()) return std::shared_ptr<Global<Value>>();
1083+
auto profData = maybeProfData.ToLocalChecked();
1084+
1085+
if (profData->IsUndefined()) return std::shared_ptr<Global<Value>>();
1086+
1087+
return static_cast<PersistentContextPtr*>(profData.As<External>()->Value())->Get();
9911088
}
9921089

9931090
NAN_GETTER(WallProfiler::GetContext) {
@@ -1018,14 +1115,13 @@ NAN_METHOD(WallProfiler::Dispose) {
10181115
void WallProfiler::PushContext(int64_t time_from,
10191116
int64_t time_to,
10201117
int64_t cpu_time,
1021-
double async_id) {
1118+
Isolate* isolate) {
10221119
// Be careful this is called in a signal handler context therefore all
10231120
// operations must be async signal safe (in particular no allocations).
10241121
// Our ring buffer avoids allocations.
1025-
auto context = curContext_.load(std::memory_order_relaxed);
1026-
std::atomic_signal_fence(std::memory_order_acquire);
10271122
if (contexts_.size() < contexts_.capacity()) {
1028-
contexts_.push_back({*context, time_from, time_to, cpu_time, async_id});
1123+
double async_id = node::AsyncHooksGetExecutionAsyncId(isolate);
1124+
contexts_.push_back({GetContextPtrSignalSafe(isolate), time_from, time_to, cpu_time, async_id});
10291125
std::atomic_fetch_add_explicit(
10301126
reinterpret_cast<std::atomic<uint32_t>*>(&fields_[kSampleCount]),
10311127
1U,

bindings/profilers/wall.hh

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,26 +36,50 @@ struct Result {
3636
std::string msg;
3737
};
3838

39+
using ContextPtr = std::shared_ptr<v8::Global<v8::Value>>;
40+
41+
class AtomicContextPtr {
42+
ContextPtr ptr1;
43+
ContextPtr ptr2;
44+
std::atomic<ContextPtr*> currentPtr;
45+
46+
void Set(v8::Isolate* isolate, v8::Local<v8::Value> value) {
47+
auto newPtr = currentPtr.load(std::memory_order_relaxed) == &ptr1 ? &ptr2 : &ptr1;
48+
if (!value->IsNullOrUndefined()) {
49+
*newPtr = std::make_shared<v8::Global<v8::Value>>(isolate, value);
50+
} else {
51+
newPtr->reset();
52+
}
53+
std::atomic_signal_fence(std::memory_order_release);
54+
currentPtr.store(newPtr, std::memory_order_relaxed);
55+
}
56+
57+
ContextPtr Get() {
58+
auto ptr = currentPtr.load(std::memory_order_relaxed);
59+
std::atomic_signal_fence(std::memory_order_acquire);
60+
return ptr ? *ptr : std::shared_ptr<v8::Global<v8::Value>>();
61+
}
62+
63+
friend class WallProfiler;
64+
};
65+
3966
class WallProfiler : public Nan::ObjectWrap {
4067
public:
4168
enum class CollectionMode { kNoCollect, kPassThrough, kCollectContexts };
4269

4370
private:
4471
enum Fields { kSampleCount, kFieldCount };
4572

46-
using ContextPtr = std::shared_ptr<v8::Global<v8::Value>>;
47-
4873
std::chrono::microseconds samplingPeriod_{0};
4974
v8::CpuProfiler* cpuProfiler_ = nullptr;
50-
// TODO: Investigate use of v8::Persistent instead of shared_ptr<Global> to
51-
// avoid heap allocation. Need to figure out the right move/copy semantics in
52-
// and out of the ring buffer.
5375

54-
// We're using a pair of shared pointers and an atomic pointer-to-current as
55-
// a way to ensure signal safety on update.
56-
ContextPtr context1_;
57-
ContextPtr context2_;
58-
std::atomic<ContextPtr*> curContext_;
76+
// If we aren't using the CPED, we use a single context ptr stored here.
77+
bool useCPED_ = false;
78+
AtomicContextPtr curContext_;
79+
80+
v8::Global<v8::Symbol> cpedSymbol_;
81+
std::atomic<int> gcCount = 0;
82+
ContextPtr gcContext;
5983

6084
std::atomic<CollectionMode> collectionMode_;
6185
std::atomic<uint64_t> noCollectCallCount_;
@@ -100,6 +124,8 @@ class WallProfiler : public Nan::ObjectWrap {
100124
int64_t startCpuTime);
101125

102126
bool waitForSignal(uint64_t targetCallCount = 0);
127+
ContextPtr GetContextPtr(v8::Isolate* isolate);
128+
ContextPtr GetContextPtrSignalSafe(v8::Isolate* isolate);
103129

104130
public:
105131
/**
@@ -108,21 +134,26 @@ class WallProfiler : public Nan::ObjectWrap {
108134
* parameter is informative; it is up to the caller to call the Stop method
109135
* every period. The parameter is used to preallocate data structures that
110136
* should not be reallocated in async signal safe code.
137+
* @param useCPED whether to use the V8 ContinuationPreservingEmbedderData
138+
* to store the current sampling context. It can be used if AsyncLocalStorage
139+
* uses the AsyncContextFrame implementation (experimental in Node 23, default
140+
* in Node 24.)
111141
*/
112142
explicit WallProfiler(std::chrono::microseconds samplingPeriod,
113143
std::chrono::microseconds duration,
114144
bool includeLines,
115145
bool withContexts,
116146
bool workaroundV8bug,
117147
bool collectCpuTime,
118-
bool isMainThread);
148+
bool isMainThread,
149+
bool useCPED);
119150

120151
v8::Local<v8::Value> GetContext(v8::Isolate*);
121152
void SetContext(v8::Isolate*, v8::Local<v8::Value>);
122153
void PushContext(int64_t time_from,
123154
int64_t time_to,
124155
int64_t cpu_time,
125-
double async_id);
156+
v8::Isolate* isolate);
126157
Result StartImpl();
127158
std::string StartInternal();
128159
Result StopImpl(bool restart, v8::Local<v8::Value>& profile);
@@ -146,6 +177,18 @@ class WallProfiler : public Nan::ObjectWrap {
146177
return threadCpuStopWatch_.GetAndReset();
147178
}
148179

180+
void OnGCStart() {
181+
if (gcCount++ == 0) {
182+
gcContext = GetContextPtr(v8::Isolate::GetCurrent());
183+
}
184+
}
185+
186+
void OnGCEnd() {
187+
if (--gcCount == 0) {
188+
gcContext.reset();
189+
}
190+
}
191+
149192
static NAN_METHOD(New);
150193
static NAN_METHOD(Start);
151194
static NAN_METHOD(Stop);

ts/src/time-profiler.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ export function start(options: TimeProfilerOptions = {}) {
9191
throw new Error('Wall profiler is already started');
9292
}
9393

94-
gProfiler = new TimeProfiler({...options, isMainThread});
94+
gProfiler = new TimeProfiler({...options, isMainThread, useCPED: false});
9595
gSourceMapper = options.sourceMapper;
9696
gIntervalMicros = options.intervalMicros!;
9797
gV8ProfilerStuckEventLoopDetected = 0;

0 commit comments

Comments
 (0)