-
Notifications
You must be signed in to change notification settings - Fork 3
[SCP-759] Fix profiler shutdown when isolate is terminated #215
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
#include <limits> | ||
#include <memory> | ||
#include <mutex> | ||
#include <type_traits> | ||
#include <vector> | ||
|
||
#include <nan.h> | ||
|
@@ -120,6 +121,18 @@ class ProtectedProfilerMap { | |
return profiler; | ||
} | ||
|
||
WallProfiler* RemoveProfilerForIsolate(const v8::Isolate* isolate) { | ||
return UpdateProfilers([isolate](auto map) { | ||
auto it = map->find(isolate); | ||
if (it != map->end()) { | ||
auto profiler = it->second; | ||
map->erase(it); | ||
return profiler; | ||
} | ||
return static_cast<WallProfiler*>(nullptr); | ||
}); | ||
} | ||
|
||
bool RemoveProfiler(const v8::Isolate* isolate, WallProfiler* profiler) { | ||
return UpdateProfilers([isolate, profiler, this](auto map) { | ||
terminatedWorkersCpu_ += profiler->GetAndResetThreadCpu(); | ||
|
@@ -177,8 +190,10 @@ class ProtectedProfilerMap { | |
} | ||
|
||
private: | ||
using ProfilerMap = std::unordered_map<const Isolate*, WallProfiler*>; | ||
|
||
template <typename F> | ||
bool UpdateProfilers(F updateFn) { | ||
std::invoke_result_t<F, ProfilerMap*> UpdateProfilers(F updateFn) { | ||
// use mutex to prevent two isolates of updating profilers concurrently | ||
std::lock_guard<std::mutex> lock(update_mutex_); | ||
|
||
|
@@ -207,7 +222,6 @@ class ProtectedProfilerMap { | |
return res; | ||
} | ||
|
||
using ProfilerMap = std::unordered_map<const Isolate*, WallProfiler*>; | ||
mutable std::atomic<ProfilerMap*> profilers_; | ||
std::mutex update_mutex_; | ||
bool init_ = false; | ||
|
@@ -366,6 +380,27 @@ static int64_t GetV8ToEpochOffset() { | |
return V8toEpochOffset; | ||
} | ||
|
||
void WallProfiler::CleanupHook(void* data) { | ||
auto isolate = static_cast<Isolate*>(data); | ||
auto prof = g_profilers.RemoveProfilerForIsolate(isolate); | ||
if (prof) { | ||
prof->Cleanup(isolate); | ||
delete prof; | ||
} | ||
} | ||
|
||
// This is only called when isolate is terminated without `beforeExit` | ||
// notification. | ||
void WallProfiler::Cleanup(Isolate* isolate) { | ||
if (started_) { | ||
cpuProfiler_->Stop(profileId_); | ||
if (interceptSignal()) { | ||
SignalHandler::DecreaseUseCount(); | ||
} | ||
Dispose(isolate, false); | ||
} | ||
} | ||
|
||
ContextsByNode WallProfiler::GetContextsByNode(CpuProfile* profile, | ||
ContextBuffer& contexts, | ||
int64_t startCpuTime) { | ||
|
@@ -547,21 +582,22 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, | |
} | ||
} | ||
|
||
WallProfiler::~WallProfiler() { | ||
Dispose(nullptr); | ||
} | ||
|
||
void WallProfiler::Dispose(Isolate* isolate) { | ||
void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { | ||
if (cpuProfiler_ != nullptr) { | ||
cpuProfiler_->Dispose(); | ||
cpuProfiler_ = nullptr; | ||
|
||
g_profilers.RemoveProfiler(isolate, this); | ||
if (removeFromMap) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could have one big There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed the call to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, that's one way to simplify it 😁. I like it. I did feel our disposal and destruction story was a bit overcomplicated, so this is 👍. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
g_profilers.RemoveProfiler(isolate, this); | ||
} | ||
|
||
if (isolate != nullptr && collectAsyncId_) { | ||
if (collectAsyncId_) { | ||
isolate->RemoveGCPrologueCallback(&GCPrologueCallback, this); | ||
isolate->RemoveGCEpilogueCallback(&GCEpilogueCallback, this); | ||
} | ||
|
||
node::RemoveEnvironmentCleanupHook( | ||
isolate, &WallProfiler::CleanupHook, isolate); | ||
} | ||
} | ||
|
||
|
@@ -702,17 +738,19 @@ Result WallProfiler::StartImpl() { | |
: CollectionMode::kNoCollect); | ||
collectionMode_.store(collectionMode, std::memory_order_relaxed); | ||
started_ = true; | ||
auto isolate = Isolate::GetCurrent(); | ||
node::AddEnvironmentCleanupHook(isolate, &WallProfiler::CleanupHook, isolate); | ||
return {}; | ||
} | ||
|
||
std::string WallProfiler::StartInternal() { | ||
v8::ProfilerId WallProfiler::StartInternal() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice that we can use this now 😄 |
||
// Reuse the same names for the profiles because strings used for profile | ||
// names are not released until v8::CpuProfiler object is destroyed. | ||
// https://github.com/nodejs/node/blob/b53c51995380b1f8d642297d848cab6010d2909c/deps/v8/src/profiler/profile-generator.h#L516 | ||
char buf[128]; | ||
snprintf(buf, sizeof(buf), "pprof-%" PRId64, (profileIdx_++) % 2); | ||
v8::Local<v8::String> title = Nan::New<String>(buf).ToLocalChecked(); | ||
cpuProfiler_->StartProfiling( | ||
auto result = cpuProfiler_->Start( | ||
title, | ||
includeLines_ ? CpuProfilingMode::kCallerLineNumbers | ||
: CpuProfilingMode::kLeafNodeLineNumbers, | ||
|
@@ -752,7 +790,7 @@ std::string WallProfiler::StartInternal() { | |
cpuProfiler_->CollectSample(v8::Isolate::GetCurrent()); | ||
} | ||
|
||
return buf; | ||
return result.id; | ||
} | ||
|
||
NAN_METHOD(WallProfiler::Stop) { | ||
|
@@ -837,12 +875,11 @@ Result WallProfiler::StopImpl(bool restart, v8::Local<v8::Value>& profile) { | |
std::atomic_signal_fence(std::memory_order_acquire); | ||
} | ||
|
||
if (withContexts_ || workaroundV8Bug_) { | ||
if (interceptSignal()) { | ||
SignalHandler::DecreaseUseCount(); | ||
} | ||
|
||
auto v8_profile = cpuProfiler_->StopProfiling( | ||
Nan::New<String>(oldProfileId).ToLocalChecked()); | ||
auto v8_profile = cpuProfiler_->Stop(oldProfileId); | ||
|
||
ContextBuffer contexts; | ||
if (withContexts_) { | ||
|
@@ -896,7 +933,7 @@ Result WallProfiler::StopImpl(bool restart, v8::Local<v8::Value>& profile) { | |
v8_profile->Delete(); | ||
|
||
if (!restart) { | ||
Dispose(v8::Isolate::GetCurrent()); | ||
Dispose(v8::Isolate::GetCurrent(), true); | ||
} else if (workaroundV8Bug_) { | ||
waitForSignal(callCount + 1); | ||
collectionMode_.store(withContexts_ ? CollectionMode::kCollectContexts | ||
|
@@ -1017,6 +1054,10 @@ NAN_METHOD(WallProfiler::V8ProfilerStuckEventLoopDetected) { | |
|
||
NAN_METHOD(WallProfiler::Dispose) { | ||
auto profiler = Nan::ObjectWrap::Unwrap<WallProfiler>(info.This()); | ||
// Profiler must already be stopped when this is called. | ||
if (profiler->started_) { | ||
return Nan::ThrowTypeError("Profiler is still running, stop it first."); | ||
} | ||
delete profiler; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
// eslint-disable-next-line node/no-unsupported-features/node-builtins | ||
import {execFile} from 'child_process'; | ||
import {promisify} from 'util'; | ||
import {Worker} from 'worker_threads'; | ||
|
||
const exec = promisify(execFile); | ||
|
||
|
@@ -11,4 +12,34 @@ describe('Worker Threads', () => { | |
const nbWorkers = 2; | ||
return exec('node', ['./out/test/worker.js', String(nbWorkers)]); | ||
}); | ||
|
||
it('should not crash when worker is terminated', async function () { | ||
this.timeout(30000); | ||
const nruns = 5; | ||
const concurrentWorkers = 20; | ||
for (let i = 0; i < nruns; i++) { | ||
const workers = []; | ||
for (let j = 0; j < concurrentWorkers; j++) { | ||
const worker = new Worker('./out/test/worker2.js'); | ||
worker.postMessage('hello'); | ||
|
||
worker.on('message', () => { | ||
worker.terminate(); | ||
}); | ||
|
||
workers.push( | ||
new Promise<void>((resolve, reject) => { | ||
worker.on('exit', exitCode => { | ||
if (exitCode === 1) { | ||
resolve(); | ||
} else { | ||
reject(new Error('Worker exited with code 0')); | ||
} | ||
}); | ||
}) | ||
); | ||
} | ||
await Promise.all(workers); | ||
} | ||
}); | ||
Comment on lines
+16
to
+44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import {parentPort} from 'node:worker_threads'; | ||
import {time} from '../src/index'; | ||
|
||
const delay = (ms: number) => new Promise(res => setTimeout(res, ms)); | ||
|
||
const DURATION_MILLIS = 1000; | ||
const INTERVAL_MICROS = 10000; | ||
const withContexts = | ||
process.platform === 'darwin' || process.platform === 'linux'; | ||
|
||
time.start({ | ||
durationMillis: DURATION_MILLIS, | ||
intervalMicros: INTERVAL_MICROS, | ||
withContexts: withContexts, | ||
collectCpuTime: withContexts, | ||
collectAsyncId: false, | ||
}); | ||
|
||
parentPort?.on('message', () => { | ||
delay(50).then(() => { | ||
parentPort?.postMessage('hello'); | ||
}); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Making this a private static method on WallProfiler actually isn't a bad idea. I should probably do an equivalent for GC callbacks so I can make them private.