Skip to content

Commit 2f6f2f8

Browse files
authored
Fix profiler shutdown when isolate is terminated (#215)
* Fix profiler shutdown when isolate is terminated When an isolate is terminated abruptly (eg. by Worker.terminate()), the profiler is not stopped and not removed from the isolate->profiler map. This can lead to a crash. This commit adds a cleanup hook with `node::AddEnvironmentCleanupHook` that is called when the isolate is destroyed. The cleanup hook stops the profiler and removes it from the profiler map.
1 parent bfa2637 commit 2f6f2f8

File tree

6 files changed

+121
-22
lines changed

6 files changed

+121
-22
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
target-name: 'dd_pprof' # target name in binding.gyp
4141
package-manager: 'npm' # npm or yarn
4242
cache: true # enable caching of dependencies based on lockfile
43-
min-node-version: 16
43+
min-node-version: 18
4444
skip: 'linux-arm,linux-ia32' # skip building for these platforms
4545

4646
dev_publish:

.github/workflows/package-size.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
- name: Setup Node.js
2020
uses: actions/setup-node@v2
2121
with:
22-
node-version: '16'
22+
node-version: '22'
2323
- run: yarn
2424
- name: Compute module size tree and report
2525
uses: qard/heaviest-objects-in-the-universe@v1

bindings/profilers/wall.cc

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <limits>
2020
#include <memory>
2121
#include <mutex>
22+
#include <type_traits>
2223
#include <vector>
2324

2425
#include <nan.h>
@@ -120,6 +121,18 @@ class ProtectedProfilerMap {
120121
return profiler;
121122
}
122123

124+
WallProfiler* RemoveProfilerForIsolate(const v8::Isolate* isolate) {
125+
return UpdateProfilers([isolate](auto map) {
126+
auto it = map->find(isolate);
127+
if (it != map->end()) {
128+
auto profiler = it->second;
129+
map->erase(it);
130+
return profiler;
131+
}
132+
return static_cast<WallProfiler*>(nullptr);
133+
});
134+
}
135+
123136
bool RemoveProfiler(const v8::Isolate* isolate, WallProfiler* profiler) {
124137
return UpdateProfilers([isolate, profiler, this](auto map) {
125138
terminatedWorkersCpu_ += profiler->GetAndResetThreadCpu();
@@ -177,8 +190,10 @@ class ProtectedProfilerMap {
177190
}
178191

179192
private:
193+
using ProfilerMap = std::unordered_map<const Isolate*, WallProfiler*>;
194+
180195
template <typename F>
181-
bool UpdateProfilers(F updateFn) {
196+
std::invoke_result_t<F, ProfilerMap*> UpdateProfilers(F updateFn) {
182197
// use mutex to prevent two isolates of updating profilers concurrently
183198
std::lock_guard<std::mutex> lock(update_mutex_);
184199

@@ -207,7 +222,6 @@ class ProtectedProfilerMap {
207222
return res;
208223
}
209224

210-
using ProfilerMap = std::unordered_map<const Isolate*, WallProfiler*>;
211225
mutable std::atomic<ProfilerMap*> profilers_;
212226
std::mutex update_mutex_;
213227
bool init_ = false;
@@ -366,6 +380,27 @@ static int64_t GetV8ToEpochOffset() {
366380
return V8toEpochOffset;
367381
}
368382

383+
void WallProfiler::CleanupHook(void* data) {
384+
auto isolate = static_cast<Isolate*>(data);
385+
auto prof = g_profilers.RemoveProfilerForIsolate(isolate);
386+
if (prof) {
387+
prof->Cleanup(isolate);
388+
delete prof;
389+
}
390+
}
391+
392+
// This is only called when isolate is terminated without `beforeExit`
393+
// notification.
394+
void WallProfiler::Cleanup(Isolate* isolate) {
395+
if (started_) {
396+
cpuProfiler_->Stop(profileId_);
397+
if (interceptSignal()) {
398+
SignalHandler::DecreaseUseCount();
399+
}
400+
Dispose(isolate, false);
401+
}
402+
}
403+
369404
ContextsByNode WallProfiler::GetContextsByNode(CpuProfile* profile,
370405
ContextBuffer& contexts,
371406
int64_t startCpuTime) {
@@ -547,21 +582,22 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
547582
}
548583
}
549584

550-
WallProfiler::~WallProfiler() {
551-
Dispose(nullptr);
552-
}
553-
554-
void WallProfiler::Dispose(Isolate* isolate) {
585+
void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) {
555586
if (cpuProfiler_ != nullptr) {
556587
cpuProfiler_->Dispose();
557588
cpuProfiler_ = nullptr;
558589

559-
g_profilers.RemoveProfiler(isolate, this);
590+
if (removeFromMap) {
591+
g_profilers.RemoveProfiler(isolate, this);
592+
}
560593

561-
if (isolate != nullptr && collectAsyncId_) {
594+
if (collectAsyncId_) {
562595
isolate->RemoveGCPrologueCallback(&GCPrologueCallback, this);
563596
isolate->RemoveGCEpilogueCallback(&GCEpilogueCallback, this);
564597
}
598+
599+
node::RemoveEnvironmentCleanupHook(
600+
isolate, &WallProfiler::CleanupHook, isolate);
565601
}
566602
}
567603

@@ -702,17 +738,19 @@ Result WallProfiler::StartImpl() {
702738
: CollectionMode::kNoCollect);
703739
collectionMode_.store(collectionMode, std::memory_order_relaxed);
704740
started_ = true;
741+
auto isolate = Isolate::GetCurrent();
742+
node::AddEnvironmentCleanupHook(isolate, &WallProfiler::CleanupHook, isolate);
705743
return {};
706744
}
707745

708-
std::string WallProfiler::StartInternal() {
746+
v8::ProfilerId WallProfiler::StartInternal() {
709747
// Reuse the same names for the profiles because strings used for profile
710748
// names are not released until v8::CpuProfiler object is destroyed.
711749
// https://github.com/nodejs/node/blob/b53c51995380b1f8d642297d848cab6010d2909c/deps/v8/src/profiler/profile-generator.h#L516
712750
char buf[128];
713751
snprintf(buf, sizeof(buf), "pprof-%" PRId64, (profileIdx_++) % 2);
714752
v8::Local<v8::String> title = Nan::New<String>(buf).ToLocalChecked();
715-
cpuProfiler_->StartProfiling(
753+
auto result = cpuProfiler_->Start(
716754
title,
717755
includeLines_ ? CpuProfilingMode::kCallerLineNumbers
718756
: CpuProfilingMode::kLeafNodeLineNumbers,
@@ -752,7 +790,7 @@ std::string WallProfiler::StartInternal() {
752790
cpuProfiler_->CollectSample(v8::Isolate::GetCurrent());
753791
}
754792

755-
return buf;
793+
return result.id;
756794
}
757795

758796
NAN_METHOD(WallProfiler::Stop) {
@@ -837,12 +875,11 @@ Result WallProfiler::StopImpl(bool restart, v8::Local<v8::Value>& profile) {
837875
std::atomic_signal_fence(std::memory_order_acquire);
838876
}
839877

840-
if (withContexts_ || workaroundV8Bug_) {
878+
if (interceptSignal()) {
841879
SignalHandler::DecreaseUseCount();
842880
}
843881

844-
auto v8_profile = cpuProfiler_->StopProfiling(
845-
Nan::New<String>(oldProfileId).ToLocalChecked());
882+
auto v8_profile = cpuProfiler_->Stop(oldProfileId);
846883

847884
ContextBuffer contexts;
848885
if (withContexts_) {
@@ -896,7 +933,7 @@ Result WallProfiler::StopImpl(bool restart, v8::Local<v8::Value>& profile) {
896933
v8_profile->Delete();
897934

898935
if (!restart) {
899-
Dispose(v8::Isolate::GetCurrent());
936+
Dispose(v8::Isolate::GetCurrent(), true);
900937
} else if (workaroundV8Bug_) {
901938
waitForSignal(callCount + 1);
902939
collectionMode_.store(withContexts_ ? CollectionMode::kCollectContexts
@@ -1017,6 +1054,10 @@ NAN_METHOD(WallProfiler::V8ProfilerStuckEventLoopDetected) {
10171054

10181055
NAN_METHOD(WallProfiler::Dispose) {
10191056
auto profiler = Nan::ObjectWrap::Unwrap<WallProfiler>(info.This());
1057+
// Profiler must already be stopped when this is called.
1058+
if (profiler->started_) {
1059+
return Nan::ThrowTypeError("Profiler is still running, stop it first.");
1060+
}
10201061
delete profiler;
10211062
}
10221063

bindings/profilers/wall.hh

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class WallProfiler : public Nan::ObjectWrap {
6262

6363
std::atomic<CollectionMode> collectionMode_;
6464
std::atomic<uint64_t> noCollectCallCount_;
65-
std::string profileId_;
65+
v8::ProfilerId profileId_;
6666
uint64_t profileIdx_ = 0;
6767
bool includeLines_ = false;
6868
bool withContexts_ = false;
@@ -92,8 +92,8 @@ class WallProfiler : public Nan::ObjectWrap {
9292
using ContextBuffer = std::vector<SampleContext>;
9393
ContextBuffer contexts_;
9494

95-
~WallProfiler();
96-
void Dispose(v8::Isolate* isolate);
95+
~WallProfiler() = default;
96+
void Dispose(v8::Isolate* isolate, bool removeFromMap);
9797

9898
// A new CPU profiler object will be created each time profiling is started
9999
// to work around https://bugs.chromium.org/p/v8/issues/detail?id=11051.
@@ -104,6 +104,8 @@ class WallProfiler : public Nan::ObjectWrap {
104104
int64_t startCpuTime);
105105

106106
bool waitForSignal(uint64_t targetCallCount = 0);
107+
static void CleanupHook(void* data);
108+
void Cleanup(v8::Isolate* isolate);
107109

108110
public:
109111
/**
@@ -129,7 +131,7 @@ class WallProfiler : public Nan::ObjectWrap {
129131
int64_t cpu_time,
130132
double async_id);
131133
Result StartImpl();
132-
std::string StartInternal();
134+
v8::ProfilerId StartInternal();
133135
Result StopImpl(bool restart, v8::Local<v8::Value>& profile);
134136

135137
CollectionMode collectionMode() {
@@ -143,6 +145,8 @@ class WallProfiler : public Nan::ObjectWrap {
143145

144146
bool collectCpuTime() const { return collectCpuTime_; }
145147

148+
bool interceptSignal() const { return withContexts_ || workaroundV8Bug_; }
149+
146150
int v8ProfilerStuckEventLoopDetected() const {
147151
return v8ProfilerStuckEventLoopDetected_;
148152
}

ts/test/test-worker-threads.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// eslint-disable-next-line node/no-unsupported-features/node-builtins
22
import {execFile} from 'child_process';
33
import {promisify} from 'util';
4+
import {Worker} from 'worker_threads';
45

56
const exec = promisify(execFile);
67

@@ -11,4 +12,34 @@ describe('Worker Threads', () => {
1112
const nbWorkers = 2;
1213
return exec('node', ['./out/test/worker.js', String(nbWorkers)]);
1314
});
15+
16+
it('should not crash when worker is terminated', async function () {
17+
this.timeout(30000);
18+
const nruns = 5;
19+
const concurrentWorkers = 20;
20+
for (let i = 0; i < nruns; i++) {
21+
const workers = [];
22+
for (let j = 0; j < concurrentWorkers; j++) {
23+
const worker = new Worker('./out/test/worker2.js');
24+
worker.postMessage('hello');
25+
26+
worker.on('message', () => {
27+
worker.terminate();
28+
});
29+
30+
workers.push(
31+
new Promise<void>((resolve, reject) => {
32+
worker.on('exit', exitCode => {
33+
if (exitCode === 1) {
34+
resolve();
35+
} else {
36+
reject(new Error('Worker exited with code 0'));
37+
}
38+
});
39+
})
40+
);
41+
}
42+
await Promise.all(workers);
43+
}
44+
});
1445
});

ts/test/worker2.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import {parentPort} from 'node:worker_threads';
2+
import {time} from '../src/index';
3+
4+
const delay = (ms: number) => new Promise(res => setTimeout(res, ms));
5+
6+
const DURATION_MILLIS = 1000;
7+
const INTERVAL_MICROS = 10000;
8+
const withContexts =
9+
process.platform === 'darwin' || process.platform === 'linux';
10+
11+
time.start({
12+
durationMillis: DURATION_MILLIS,
13+
intervalMicros: INTERVAL_MICROS,
14+
withContexts: withContexts,
15+
collectCpuTime: withContexts,
16+
collectAsyncId: false,
17+
});
18+
19+
parentPort?.on('message', () => {
20+
delay(50).then(() => {
21+
parentPort?.postMessage('hello');
22+
});
23+
});

0 commit comments

Comments
 (0)