Skip to content

Commit b7093bf

Browse files
authored
[SYCL] Eliminate XPTI overhead when it is disabled (#18334)
Even if the XPTI collector is not used, we still have some overhead related to XPTI instrumentation when we call `xptiRegisterStream` function. With these changes, we cache the result of the `xptiTraceEnabled` function in the static variables. It is safe to cache it because XPTI collector is enabled by the `XPTI_FRAMEWORK_DISPATCHER` environment variable before the process starts. VTune shows ~2% decrease in the number of instructions retired on a submit kernel path. --------- Signed-off-by: Sergei Vinogradov <[email protected]>
1 parent 1a87eec commit b7093bf

File tree

3 files changed

+54
-35
lines changed

3 files changed

+54
-35
lines changed

sycl/source/detail/graph_impl.cpp

+20-14
Original file line numberDiff line numberDiff line change
@@ -800,19 +800,25 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx,
800800
ur_exp_command_buffer_command_handle_t NewCommand = 0;
801801

802802
#ifdef XPTI_ENABLE_INSTRUMENTATION
803-
int32_t StreamID = xptiRegisterStream(sycl::detail::SYCL_STREAM_NAME);
804-
sycl::detail::CGExecKernel *CGExec =
805-
static_cast<sycl::detail::CGExecKernel *>(Node->MCommandGroup.get());
806-
sycl::detail::code_location CodeLoc(CGExec->MFileName.c_str(),
807-
CGExec->MFunctionName.c_str(),
808-
CGExec->MLine, CGExec->MColumn);
809-
auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
810-
StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc,
811-
CGExec->MKernelName.data(), nullptr, CGExec->MNDRDesc,
812-
CGExec->MKernelBundle, CGExec->MArgs);
813-
if (CmdTraceEvent)
814-
sycl::detail::emitInstrumentationGeneral(
815-
StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr);
803+
const bool xptiEnabled = xptiTraceEnabled();
804+
int32_t StreamID = xpti::invalid_id;
805+
xpti_td *CmdTraceEvent = nullptr;
806+
uint64_t InstanceID = 0;
807+
if (xptiEnabled) {
808+
StreamID = xptiRegisterStream(sycl::detail::SYCL_STREAM_NAME);
809+
sycl::detail::CGExecKernel *CGExec =
810+
static_cast<sycl::detail::CGExecKernel *>(Node->MCommandGroup.get());
811+
sycl::detail::code_location CodeLoc(CGExec->MFileName.c_str(),
812+
CGExec->MFunctionName.c_str(),
813+
CGExec->MLine, CGExec->MColumn);
814+
std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData(
815+
StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc,
816+
CGExec->MKernelName.data(), nullptr, CGExec->MNDRDesc,
817+
CGExec->MKernelBundle, CGExec->MArgs);
818+
if (CmdTraceEvent)
819+
sycl::detail::emitInstrumentationGeneral(
820+
StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr);
821+
}
816822
#endif
817823

818824
ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel(
@@ -830,7 +836,7 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx,
830836
}
831837

832838
#ifdef XPTI_ENABLE_INSTRUMENTATION
833-
if (CmdTraceEvent)
839+
if (xptiEnabled && CmdTraceEvent)
834840
sycl::detail::emitInstrumentationGeneral(
835841
StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr);
836842
#endif

sycl/source/detail/queue_impl.cpp

+9-3
Original file line numberDiff line numberDiff line change
@@ -603,11 +603,15 @@ void queue_impl::instrumentationEpilog(void *TelemetryEvent, std::string &Name,
603603
void queue_impl::wait(const detail::code_location &CodeLoc) {
604604
(void)CodeLoc;
605605
#ifdef XPTI_ENABLE_INSTRUMENTATION
606+
const bool xptiEnabled = xptiTraceEnabled();
606607
void *TelemetryEvent = nullptr;
607608
uint64_t IId;
608609
std::string Name;
609-
int32_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME);
610-
TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId);
610+
int32_t StreamID = xpti::invalid_id;
611+
if (xptiEnabled) {
612+
StreamID = xptiRegisterStream(SYCL_STREAM_NAME);
613+
TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId);
614+
}
611615
#endif
612616

613617
if (MGraph.lock()) {
@@ -675,7 +679,9 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
675679
Event->wait(Event);
676680

677681
#ifdef XPTI_ENABLE_INSTRUMENTATION
678-
instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
682+
if (xptiEnabled) {
683+
instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
684+
}
679685
#endif
680686
}
681687

sycl/source/handler.cpp

+25-18
Original file line numberDiff line numberDiff line change
@@ -533,19 +533,23 @@ event handler::finalize() {
533533
}
534534

535535
#ifdef XPTI_ENABLE_INSTRUMENTATION
536-
// uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent,
537-
int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME);
538-
auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
539-
StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, MKernelName.data(),
540-
MQueue, impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs);
541-
auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent,
542-
InstanceID = InstanceID]() {
543-
#else
544-
auto EnqueueKernel = [&]() {
536+
const bool xptiEnabled = xptiTraceEnabled();
545537
#endif
538+
auto EnqueueKernel = [&]() {
546539
#ifdef XPTI_ENABLE_INSTRUMENTATION
547-
detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
548-
xpti::trace_task_begin, nullptr);
540+
int32_t StreamID = xpti::invalid_id;
541+
xpti_td *CmdTraceEvent = nullptr;
542+
uint64_t InstanceID = 0;
543+
if (xptiEnabled) {
544+
StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME);
545+
std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData(
546+
StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc,
547+
MKernelName.data(), MQueue, impl->MNDRDesc, KernelBundleImpPtr,
548+
impl->MArgs);
549+
detail::emitInstrumentationGeneral(StreamID, InstanceID,
550+
CmdTraceEvent,
551+
xpti::trace_task_begin, nullptr);
552+
}
549553
#endif
550554
const detail::RTDeviceBinaryImage *BinImage = nullptr;
551555
if (detail::SYCLConfig<detail::SYCL_JIT_AMDGCN_PTX_KERNELS>::get()) {
@@ -561,14 +565,17 @@ event handler::finalize() {
561565
impl->MKernelUsesClusterLaunch,
562566
impl->MKernelWorkGroupMemorySize, BinImage);
563567
#ifdef XPTI_ENABLE_INSTRUMENTATION
564-
// Emit signal only when event is created
565-
if (!DiscardEvent) {
566-
detail::emitInstrumentationGeneral(
567-
StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal,
568-
static_cast<const void *>(LastEventImpl->getHandle()));
568+
if (xptiEnabled) {
569+
// Emit signal only when event is created
570+
if (!DiscardEvent) {
571+
detail::emitInstrumentationGeneral(
572+
StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal,
573+
static_cast<const void *>(LastEventImpl->getHandle()));
574+
}
575+
detail::emitInstrumentationGeneral(StreamID, InstanceID,
576+
CmdTraceEvent,
577+
xpti::trace_task_end, nullptr);
569578
}
570-
detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
571-
xpti::trace_task_end, nullptr);
572579
#endif
573580
};
574581

0 commit comments

Comments
 (0)