diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 4b8c33bcf469f..c4d7a2a027057 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1621,12 +1621,14 @@ def SYCLType: InheritableAttr { let Subjects = SubjectList<[CXXRecord, Enum], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Args = [EnumArgument<"Type", "SYCLType", /*is_string=*/true, - ["accessor", "local_accessor", "work_group_memory", "dynamic_work_group_memory", + ["accessor", "local_accessor", "dynamic_local_accessor", + "work_group_memory", "dynamic_work_group_memory", "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", "stream", "sampler", "host_pipe", "multi_ptr"], - ["accessor", "local_accessor", "work_group_memory", "dynamic_work_group_memory", + ["accessor", "local_accessor", "dynamic_local_accessor", + "work_group_memory", "dynamic_work_group_memory", "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h index 28a9c1859638a..8aac24b8d0079 100644 --- a/clang/include/clang/Sema/SemaSYCL.h +++ b/clang/include/clang/Sema/SemaSYCL.h @@ -64,7 +64,8 @@ class SYCLIntegrationHeader { kind_stream, kind_work_group_memory, kind_dynamic_work_group_memory, - kind_last = kind_dynamic_work_group_memory + kind_dynamic_accessor, + kind_last = kind_dynamic_accessor }; public: diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 9a30b3e693ec2..e4c68b1ee3a25 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -94,7 +94,8 @@ bool SemaSYCL::isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName) { static bool isSyclAccessorType(QualType Ty) { return SemaSYCL::isSyclType(Ty, SYCLTypeAttr::accessor) || - SemaSYCL::isSyclType(Ty, SYCLTypeAttr::local_accessor); + SemaSYCL::isSyclType(Ty, SYCLTypeAttr::local_accessor) || + SemaSYCL::isSyclType(Ty, SYCLTypeAttr::dynamic_local_accessor); } // FIXME: Accessor property lists should be modified to use compile-time @@ -1151,7 +1152,8 @@ static QualType GetSYCLKernelObjectType(const FunctionDecl *KernelCaller) { /// \return the target of given SYCL accessor type static target getAccessTarget(QualType FieldTy, const ClassTemplateSpecializationDecl *AccTy) { - if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::local_accessor)) + if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::local_accessor) || + SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::dynamic_local_accessor)) return local; return static_cast( @@ -4796,7 +4798,13 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { int Dims = static_cast( AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); int Info = getAccessTarget(FieldTy, AccTy) | (Dims << 11); - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, + + SYCLIntegrationHeader::kernel_param_kind_t ParamKind = + SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::dynamic_local_accessor) + ? SYCLIntegrationHeader::kind_dynamic_accessor + : SYCLIntegrationHeader::kind_accessor; + + Header.addParamDesc(ParamKind, Info, CurOffset + offsetOf(RD, BC.getType()->getAsCXXRecordDecl())); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::work_group_memory)) { @@ -4822,8 +4830,12 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); int Info = getAccessTarget(FieldTy, AccTy) | (Dims << 11); - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, - CurOffset + offsetOf(FD, FieldTy)); + SYCLIntegrationHeader::kernel_param_kind_t ParamKind = + SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::dynamic_local_accessor) + ? SYCLIntegrationHeader::kind_dynamic_accessor + : SYCLIntegrationHeader::kind_accessor; + + Header.addParamDesc(ParamKind, Info, CurOffset + offsetOf(FD, FieldTy)); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::stream)) { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::work_group_memory)) { @@ -6037,6 +6049,7 @@ static const char *paramKind2Str(KernelParamKind K) { CASE(pointer); CASE(work_group_memory); CASE(dynamic_work_group_memory); + CASE(dynamic_accessor); } return ""; diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 0d80872d6aa76..4245fb0f658c9 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -440,6 +440,9 @@ local_accessor: public accessor AccessRange, range MemRange, id Offset) {} + +template + friend class dynamic_local_accessor; #endif }; @@ -693,6 +696,23 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory { work_group_memory LocalMem; }; +template +class __attribute__((sycl_special_class)) +__SYCL_TYPE(dynamic_local_accessor) dynamic_local_accessor { +public: + dynamic_local_accessor() = default; + +void __init(__attribute__((opencl_local)) DataT *Ptr, + range AccessRange, range range, + id id) { + this->LocalMem.__init(Ptr, AccessRange, range, id); +} + local_accessor get() const { return LocalMem; } + +private: + local_accessor LocalMem; +}; + template class buffer { diff --git a/clang/test/CodeGenSYCL/dynamic_local_accessor.cpp b/clang/test/CodeGenSYCL/dynamic_local_accessor.cpp new file mode 100644 index 0000000000000..05d1896223ac2 --- /dev/null +++ b/clang/test/CodeGenSYCL/dynamic_local_accessor.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o %t.ll +// RUN: FileCheck < %t.ll %s --check-prefix CHECK-IR +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-int-header=%t.h %s +// RUN: FileCheck < %t.h %s --check-prefix CHECK-INT-HEADER +// +// Tests for dynamic_local_accessor kernel parameter using the dummy implementation in Inputs/sycl.hpp. +// The first two RUN commands verify that the init call is generated with the correct arguments in LLVM IR +// and the second two RUN commands verify the contents of the integration header produced by the frontend. +// +// CHECK-IR: define dso_local spir_kernel void @ +// CHECK-IR-SAME: ptr addrspace(3) noundef align 4 [[PTR:%[a-zA-Z0-9_]+]] +// +// CHECK-IR: [[PTR]].addr = alloca ptr addrspace(3), align 8 +// CHECK-IR: [[PTR]].addr.ascast = addrspacecast ptr [[PTR]].addr to ptr addrspace(4) +// CHECK-IR: store ptr addrspace(3) [[PTR]], ptr addrspace(4) [[PTR]].addr.ascast, align 8 +// CHECK-IR: [[PTR_LOAD:%[a-zA-Z0-9_]+]] = load ptr addrspace(3), ptr addrspace(4) [[PTR]].addr.ascast, align 8 +// +// CHECK-IR: call spir_func void @{{.*}}__init{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable_or_null(24) %{{[a-zA-Z0-9_]+}}, ptr addrspace(3) noundef %{{[a-zA-Z0-9_]+}}, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %agg.{{[a-zA-Z0-9_]+}}.ascast.ascast, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %agg.{{[a-zA-Z0-9_]+}}.ascast.ascast, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %agg.{{[a-zA-Z0-9_]+}}.ascast.ascast) #{{[0-9_]+}} +// +// CHECK-INT-HEADER: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-INT-HEADER-NEXT: //--- _ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlNS0_4itemILi1EEEE_ +// CHECK-INT-HEADER-NEXT: { kernel_param_kind_t::kind_dynamic_accessor, 4064, 0 }, +// CHECK-INT-HEADER-EMPTY: +// CHECK-INT-HEADER-NEXT: { kernel_param_kind_t::kind_invalid, -987654321, -987654321 }, +// CHECK-INT-HEADER-NEXT: }; + +#include "Inputs/sycl.hpp" + +int main() { + sycl::queue Q; + sycl::dynamic_local_accessor dynLocalAcc; + Q.submit([&](sycl::handler &CGH) { + sycl::range<1> ndr; + CGH.parallel_for(ndr, [=](sycl::item<1> it) { + auto localAcc = dynLocalAcc.get(); + auto* ptr = &localAcc; +}); + }); + return 0; +} diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index 06f95f8faa1d6..2917774314e1f 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -227,6 +227,10 @@ template > class accessor; +namespace ext::oneapi::experimental { +template class dynamic_local_accessor; +} + namespace detail { template @@ -2638,6 +2642,8 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor private: friend class sycl::ext::intel::esimd::detail::AccessorPrivateProxy; + template + friend class ext::oneapi::experimental::dynamic_local_accessor; }; template &impl); + dynamic_parameter_base( sycl::ext::oneapi::experimental::command_graph Graph); @@ -546,8 +549,6 @@ class __SYCL_EXPORT dynamic_parameter_base { void updateAccessor(const sycl::detail::AccessorBaseHost *Acc); - void updateWorkGroupMem(size_t BufferSize); - std::shared_ptr impl; template @@ -555,44 +556,45 @@ class __SYCL_EXPORT dynamic_parameter_base { sycl::detail::getSyclObjImpl(const Obj &SyclObject); }; -class dynamic_work_group_memory_base -#ifndef __SYCL_DEVICE_ONLY__ - : public dynamic_parameter_base -#endif -{ +class __SYCL_EXPORT dynamic_work_group_memory_base + : public dynamic_parameter_base { + public: dynamic_work_group_memory_base() = default; -#ifndef __SYCL_DEVICE_ONLY__ -#ifdef __INTEL_PREVIEW_BREAKING_CHANGES - dynamic_work_group_memory_base(size_t Size) - : dynamic_parameter_base(), BufferSize(Size) {} +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + dynamic_work_group_memory_base(size_t BufferSizeInBytes); #endif // TODO: Remove in next ABI breaking window dynamic_work_group_memory_base( - experimental::command_graph Graph, size_t Size) - : dynamic_parameter_base(Graph), BufferSize(Size) {} -#else - dynamic_work_group_memory_base(size_t Size) : BufferSize(Size) {} - dynamic_work_group_memory_base( - experimental::command_graph /*Graph*/, - size_t Size) - : BufferSize(Size) {} -#endif + experimental::command_graph Graph, + size_t BufferSizeInBytes); -private: -#ifdef __SYCL_DEVICE_ONLY__ - [[maybe_unused]] unsigned char Padding[sizeof(dynamic_parameter_base)]; -#endif - size_t BufferSize{}; - friend class sycl::handler; +protected: + void updateWorkGroupMem(size_t NewBufferSizeInBytes); }; + +class __SYCL_EXPORT dynamic_local_accessor_base + : public dynamic_parameter_base { +public: + dynamic_local_accessor_base() = default; + + dynamic_local_accessor_base(sycl::range<3> AllocationSize, int Dims, + int ElemSize, const property_list &PropList); + +protected: + void updateLocalAccessor(sycl::range<3> NewAllocationSize); +}; + } // namespace detail template class __SYCL_SPECIAL_CLASS __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory - : public detail::dynamic_work_group_memory_base { +#ifndef __SYCL_DEVICE_ONLY__ + : public detail::dynamic_work_group_memory_base +#endif +{ public: // Check that DataT is an unbounded array type. static_assert(std::is_array_v && std::extent_v == 0); @@ -607,11 +609,15 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory dynamic_work_group_memory() = default; #ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#ifndef __SYCL_DEVICE_ONLY__ /// Constructs a new dynamic_work_group_memory object. /// @param Num Number of elements in the unbounded array DataT. dynamic_work_group_memory(size_t Num) : detail::dynamic_work_group_memory_base( Num * sizeof(std::remove_extent_t)) {} +#else + dynamic_work_group_memory(size_t /*Num*/) {} +#endif #endif #ifndef __INTEL_PREVIEW_BREAKING_CHANGES @@ -619,6 +625,8 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory "object have been deprecated " "and will be removed in the next ABI breaking window.") #endif + +#ifndef __SYCL_DEVICE_ONLY__ /// Constructs a new dynamic_work_group_memory object. /// @param Graph The graph associated with this object. /// @param Num Number of elements in the unbounded array DataT. @@ -627,6 +635,12 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory : detail::dynamic_work_group_memory_base( Graph, Num * sizeof(std::remove_extent_t)) {} +#else + dynamic_work_group_memory(experimental::command_graph + /* Graph */, + size_t /* Num */) {} +#endif + work_group_memory get() const { #ifndef __SYCL_DEVICE_ONLY__ throw sycl::exception(sycl::make_error_code(errc::invalid), @@ -641,8 +655,7 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory /// @param Num The new number of elements in the unbounded array. void update([[maybe_unused]] size_t Num) { #ifndef __SYCL_DEVICE_ONLY__ - detail::dynamic_parameter_base::updateWorkGroupMem( - Num * sizeof(std::remove_extent_t)); + updateWorkGroupMem(Num * sizeof(std::remove_extent_t)); #endif } @@ -655,6 +668,78 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory value_type, access::address_space::local_space>::type *; void __init(decoratedPtr Ptr) { this->WorkGroupMem.__init(Ptr); } + + [[maybe_unused]] unsigned char + Padding[sizeof(detail::dynamic_work_group_memory_base)]; +#endif +}; + +template +class __SYCL_SPECIAL_CLASS +__SYCL_TYPE(dynamic_local_accessor) dynamic_local_accessor +#ifndef __SYCL_DEVICE_ONLY__ + : public detail::dynamic_local_accessor_base +#endif +{ +public: + static_assert(Dimensions > 0 && Dimensions <= 3); + + // Frontend requires special types to have a default constructor in order to + // have a uniform way of initializing an object of special type to then call + // the __init method on it. This is purely an implementation detail and not + // part of the spec. + // TODO: Revisit this once https://github.com/intel/llvm/issues/16061 is + // closed. + dynamic_local_accessor() = default; + +#ifndef __SYCL_DEVICE_ONLY__ + /// Constructs a new dynamic_local_accessor object. + /// @param Graph The graph associated with this object. + /// @param AllocationSize The size of the local accessor. + /// @param PropList List of properties for the underlying accessor. + dynamic_local_accessor( + experimental::command_graph /* Graph */, + range AllocationSize, const property_list &PropList = {}) + : detail::dynamic_local_accessor_base( + detail::convertToArrayOfN<3, 1>(AllocationSize), Dimensions, + sizeof(DataT), PropList) {} +#else + dynamic_local_accessor(experimental::command_graph + /* Graph */, + range /* AllocationSize */, + const property_list & /*PropList */ = {}) {} +#endif + + local_accessor get() const { +#ifndef __SYCL_DEVICE_ONLY__ + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Error: dynamic_local_accessor::get() can be only " + "called on the device!"); +#endif + return LocalAccessor; + } + + /// Updates on the host this dynamic_local_accessor and all registered + /// nodes with a new size. + /// @param Num The new number of elements in the unbounded array. + void update([[maybe_unused]] range NewAllocationSize) { +#ifndef __SYCL_DEVICE_ONLY__ + updateLocalAccessor(detail::convertToArrayOfN<3, 1>(NewAllocationSize)); +#endif + } + +private: + local_accessor LocalAccessor; + +#ifdef __SYCL_DEVICE_ONLY__ + void __init(typename local_accessor::ConcreteASPtrType Ptr, + range AccessRange, range range, + id id) { + this->LocalAccessor.__init(Ptr, AccessRange, range, id); + } + + [[maybe_unused]] unsigned char + Padding[sizeof(detail::dynamic_local_accessor_base)]; #endif }; diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index dfb14d62a20bf..566a455d32313 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -161,9 +161,11 @@ __SYCL_EXPORT void *async_malloc_from_pool(sycl::handler &h, size_t size, } // namespace ext::oneapi::experimental namespace ext::oneapi::experimental::detail { -class graph_impl; class dynamic_parameter_base; class dynamic_work_group_memory_base; +class dynamic_local_accessor_base; +class graph_impl; +class dynamic_parameter_impl; } // namespace ext::oneapi::experimental::detail namespace detail { @@ -707,16 +709,37 @@ class __SYCL_EXPORT handler { *static_cast(detail::getValueFromDynamicParameter(DynamicParam)); // Set the arg in the handler as normal setArgHelper(ArgIndex, std::move(ArgValue)); + // Register the dynamic parameter with the handler for later association // with the node being added registerDynamicParameter(DynamicParam, ArgIndex); } - // setArgHelper for graph dynamic_work_group_memory void setArgHelper(int ArgIndex, ext::oneapi::experimental::detail::dynamic_work_group_memory_base - &DynWorkGroupBase); + &DynWorkGroupBase) { + + ext::oneapi::experimental::detail::dynamic_parameter_impl *DynParamImpl = + detail::getSyclObjImpl(DynWorkGroupBase).get(); + + addArg(detail::kernel_param_kind_t::kind_dynamic_work_group_memory, + DynParamImpl, 0, ArgIndex); + registerDynamicParameter(DynParamImpl, ArgIndex); + } + + void + setArgHelper(int ArgIndex, + ext::oneapi::experimental::detail::dynamic_local_accessor_base + &DynLocalAccessorBase) { + + ext::oneapi::experimental::detail::dynamic_parameter_impl *DynParamImpl = + detail::getSyclObjImpl(DynLocalAccessorBase).get(); + + addArg(detail::kernel_param_kind_t::kind_dynamic_accessor, DynParamImpl, 0, + ArgIndex); + registerDynamicParameter(DynParamImpl, ArgIndex); + } // setArgHelper for the raw_kernel_arg extension type. void setArgHelper(int ArgIndex, @@ -726,6 +749,8 @@ class __SYCL_EXPORT handler { Arg.MArgSize, ArgIndex); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // TODO: Remove in the next ABI-breaking window. /// Registers a dynamic parameter with the handler for later association with /// the node being created /// @param DynamicParamBase @@ -734,6 +759,16 @@ class __SYCL_EXPORT handler { ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase, int ArgIndex); +#endif + + /// Registers a dynamic parameter with the handler for later association with + /// the node being created + /// @param DynamicParamImpl + /// @param ArgIndex + void registerDynamicParameter( + ext::oneapi::experimental::detail::dynamic_parameter_impl + *DynamicParamImpl, + int ArgIndex); /// Verifies the kernel bundle to be used if any is set. This throws a /// sycl::exception with error code errc::kernel_not_supported if the used @@ -1812,13 +1847,29 @@ class __SYCL_EXPORT handler { // set_arg for graph dynamic_work_group_memory template - void set_arg( - int argIndex, - ext::oneapi::experimental::dynamic_work_group_memory - &dynWorkGroupMem) { + void + set_arg([[maybe_unused]] int argIndex, + [[maybe_unused]] ext::oneapi::experimental::dynamic_work_group_memory< + DataT, PropertyListT> &DynWorkGroupMem) { + +#ifndef __SYCL_DEVICE_ONLY__ ext::oneapi::experimental::detail::dynamic_work_group_memory_base - &dynWorkGroupBase = dynWorkGroupMem; - setArgHelper(argIndex, dynWorkGroupBase); + &DynWorkGroupBase = DynWorkGroupMem; + setArgHelper(argIndex, DynWorkGroupBase); +#endif + } + + // set_arg for graph dynamic_local_accessor + template + void + set_arg([[maybe_unused]] int argIndex, + [[maybe_unused]] ext::oneapi::experimental::dynamic_local_accessor< + DataT, Dimensions> &DynLocalAccessor) { +#ifndef __SYCL_DEVICE_ONLY__ + ext::oneapi::experimental::detail::dynamic_local_accessor_base + &DynLocalAccessorBase = DynLocalAccessor; + setArgHelper(argIndex, DynLocalAccessorBase); +#endif } // set_arg for the raw_kernel_arg extension type. diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 931816551d95e..39192d0a0aa02 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -2006,6 +2006,10 @@ dynamic_parameter_base::dynamic_parameter_base() : impl(std::make_shared()) {} #endif +dynamic_parameter_base::dynamic_parameter_base( + const std::shared_ptr &impl) + : impl(impl) {} + dynamic_parameter_base::dynamic_parameter_base( command_graph) : impl(std::make_shared()) {} @@ -2027,8 +2031,37 @@ void dynamic_parameter_base::updateAccessor( impl->updateAccessor(Acc); } -void dynamic_parameter_base::updateWorkGroupMem(size_t BufferSize) { - impl->updateWorkGroupMem(BufferSize); +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +dynamic_work_group_memory_base::dynamic_work_group_memory_base( + size_t BufferSizeInBytes) + : dynamic_parameter_base( + std::make_shared(BufferSizeInBytes)) { +} +#endif + +dynamic_work_group_memory_base::dynamic_work_group_memory_base( + experimental::command_graph /* Graph */, + size_t BufferSizeInBytes) + : dynamic_parameter_base( + std::make_shared(BufferSizeInBytes)) { +} + +void dynamic_work_group_memory_base::updateWorkGroupMem( + size_t NewBufferSizeInBytes) { + static_cast(impl.get()) + ->updateWorkGroupMem(NewBufferSizeInBytes); +} + +dynamic_local_accessor_base::dynamic_local_accessor_base( + sycl::range<3> AllocationSize, int Dims, int ElemSize, + const property_list &PropList) + : dynamic_parameter_base(std::make_shared( + AllocationSize, Dims, ElemSize, PropList)) {} + +void dynamic_local_accessor_base::updateLocalAccessor( + sycl::range<3> NewAllocationSize) { + static_cast(impl.get()) + ->updateLocalAccessor(NewAllocationSize); } void dynamic_parameter_impl::updateValue(const raw_kernel_arg *NewRawValue, @@ -2086,39 +2119,6 @@ void dynamic_parameter_impl::updateAccessor( sizeof(sycl::detail::AccessorBaseHost)); } -void dynamic_parameter_impl::updateWorkGroupMem(size_t BufferSize) { - for (auto &[NodeWeak, ArgIndex] : MNodes) { - auto NodeShared = NodeWeak.lock(); - if (NodeShared) { - dynamic_parameter_impl::updateCGWorkGroupMem(NodeShared->MCommandGroup, - ArgIndex, BufferSize); - } - } - - for (auto &DynCGInfo : MDynCGs) { - auto DynCG = DynCGInfo.DynCG.lock(); - if (DynCG) { - auto &CG = DynCG->MCommandGroups[DynCGInfo.CGIndex]; - dynamic_parameter_impl::updateCGWorkGroupMem(CG, DynCGInfo.ArgIndex, - BufferSize); - } - } -} - -void dynamic_parameter_impl::updateCGWorkGroupMem( - std::shared_ptr CG, int ArgIndex, size_t BufferSize) { - - auto &Args = static_cast(CG.get())->MArgs; - for (auto &Arg : Args) { - if (Arg.MIndex != ArgIndex) { - continue; - } - assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_std_layout); - Arg.MSize = BufferSize; - break; - } -} - void dynamic_parameter_impl::updateCGArgValue( std::shared_ptr CG, int ArgIndex, const void *NewValue, size_t Size) { @@ -2184,6 +2184,90 @@ void dynamic_parameter_impl::updateCGAccessor( } } +void dynamic_work_group_memory_impl::updateWorkGroupMem( + size_t NewBufferSizeInBytes) { + for (auto &[NodeWeak, ArgIndex] : MNodes) { + auto NodeShared = NodeWeak.lock(); + if (NodeShared) { + dynamic_work_group_memory_impl::updateCGWorkGroupMem( + NodeShared->MCommandGroup, ArgIndex, NewBufferSizeInBytes); + } + } + + for (auto &DynCGInfo : MDynCGs) { + auto DynCG = DynCGInfo.DynCG.lock(); + if (DynCG) { + auto &CG = DynCG->MCommandGroups[DynCGInfo.CGIndex]; + dynamic_work_group_memory_impl::updateCGWorkGroupMem( + CG, DynCGInfo.ArgIndex, NewBufferSizeInBytes); + } + } +} + +void dynamic_work_group_memory_impl::updateCGWorkGroupMem( + std::shared_ptr CG, int ArgIndex, + size_t NewBufferSizeInBytes) { + + auto &Args = static_cast(CG.get())->MArgs; + for (auto &Arg : Args) { + if (Arg.MIndex != ArgIndex) { + continue; + } + assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_std_layout); + Arg.MSize = NewBufferSizeInBytes; + break; + } +} + +dynamic_local_accessor_impl::dynamic_local_accessor_impl( + sycl::range<3> AllocationSize, int Dims, int ElemSize, + const property_list &PropList) + : dynamic_parameter_impl(), + LAccImplHost(AllocationSize, Dims, ElemSize, {}) { + checkGraphPropertiesAndThrow(PropList); +} + +void dynamic_local_accessor_impl::updateLocalAccessor( + range<3> NewAllocationSize) { + for (auto &[NodeWeak, ArgIndex] : MNodes) { + auto NodeShared = NodeWeak.lock(); + if (NodeShared) { + dynamic_local_accessor_impl::updateCGLocalAccessor( + NodeShared->MCommandGroup, ArgIndex, NewAllocationSize); + } + } + + for (auto &DynCGInfo : MDynCGs) { + auto DynCG = DynCGInfo.DynCG.lock(); + if (DynCG) { + auto &CG = DynCG->MCommandGroups[DynCGInfo.CGIndex]; + dynamic_local_accessor_impl::updateCGLocalAccessor(CG, DynCGInfo.ArgIndex, + NewAllocationSize); + } + } +} + +void dynamic_local_accessor_impl::updateCGLocalAccessor( + std::shared_ptr CG, int ArgIndex, + range<3> NewAllocationSize) { + + auto &Args = static_cast(CG.get())->MArgs; + for (auto &Arg : Args) { + if (Arg.MIndex != ArgIndex) { + continue; + } + assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_std_layout); + + // Update the local memory Size Argument + Arg.MSize = NewAllocationSize.size() * LAccImplHost.MElemSize; + + // MSize is used as an argument to the AccField kernel parameters. + LAccImplHost.MSize = NewAllocationSize; + + break; + } +} + dynamic_command_group_impl::dynamic_command_group_impl( const command_graph &Graph) : MGraph{sycl::detail::getSyclObjImpl(Graph)}, MActiveCGF(0), diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 1600b76f7b991..d07de520af7f2 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -1618,22 +1618,6 @@ class dynamic_parameter_impl { /// @param Acc The new accessor value void updateAccessor(const sycl::detail::AccessorBaseHost *Acc); - /// Update the internal value of this dynamic parameter as well as the value - /// of this parameter in all registered nodes and dynamic CGs. Should only be - /// called for dynamic_work_group_memory arguments parameter. - /// @param BufferSize The total size in bytes of the new work_group_memory - /// array - void updateWorkGroupMem(size_t BufferSize); - - /// Static helper function for updating command-group - /// dynamic_work_group_memory arguments. - /// @param CG The command-group to update the argument information for. - /// @param ArgIndex The argument index to update. - /// @param BufferSize The total size in bytes of the new work_group_memory - /// array - static void updateCGWorkGroupMem(std::shared_ptr CG, - int ArgIndex, size_t BufferSize); - /// Static helper function for updating command-group value arguments. /// @param CG The command-group to update the argument information for. /// @param ArgIndex The argument index to update. @@ -1664,6 +1648,58 @@ class dynamic_parameter_impl { inline static std::atomic NextAvailableID = 0; }; +class dynamic_work_group_memory_impl : public dynamic_parameter_impl { + +public: + dynamic_work_group_memory_impl(size_t BufferSizeInBytes) + : BufferSizeInBytes(BufferSizeInBytes) {} + + virtual ~dynamic_work_group_memory_impl() = default; + + /// Update the internal value of this dynamic parameter as well as the value + /// of this parameter in all registered nodes and dynamic CGs. + /// @param NewBufferSizeInBytes The total size in bytes of the new + /// work_group_memory array. + void updateWorkGroupMem(size_t NewBufferSizeInBytes); + + /// Static helper function for updating command-group + /// dynamic_work_group_memory arguments. + /// @param CG The command-group to update the argument information for. + /// @param ArgIndex The argument index to update. + /// @param NewBufferSizeInBytes The total size in bytes of the new + /// work_group_memory array. + void updateCGWorkGroupMem(std::shared_ptr CG, int ArgIndex, + size_t NewBufferSizeInBytes); + + size_t BufferSizeInBytes; +}; + +class dynamic_local_accessor_impl : public dynamic_parameter_impl { + +public: + dynamic_local_accessor_impl(sycl::range<3> AllocationSize, int Dims, + int ElemSize, const property_list &PropList); + + virtual ~dynamic_local_accessor_impl() = default; + + /// Update the internal value of this dynamic parameter as well as the value + /// of this parameter in all registered nodes and dynamic CGs. + /// @param NewAllocationSize The new allocation size for the + /// dynamic_local_accessor. + void updateLocalAccessor(range<3> NewAllocationSize); + + /// Static helper function for updating command-group dynamic_local_accessor + /// arguments. + /// @param CG The command-group to update the argument information for. + /// @param ArgIndex The argument index to update. + /// @param NewAllocationSize The new allocation size for the + /// dynamic_local_accessor. + void updateCGLocalAccessor(std::shared_ptr CG, int ArgIndex, + range<3> NewAllocationSize); + + detail::LocalAccessorImplHost LAccImplHost; +}; + class dynamic_command_group_impl : public std::enable_shared_from_this { public: diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 783ce3b1412bb..7a6b110b0b853 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2309,6 +2309,7 @@ void SetArgBasedOnType( break; case kernel_param_kind_t::kind_stream: break; + case kernel_param_kind_t::kind_dynamic_accessor: case kernel_param_kind_t::kind_accessor: { Requirement *Req = (Requirement *)(Arg.MPtr); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1eb05ee858568..1c1406b6d6f2f 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -432,7 +432,7 @@ event handler::finalize() { // Check associated accessors bool AccFound = false; for (detail::ArgDesc &Acc : impl->MAssociatedAccesors) { - if (Acc.MType == detail::kernel_param_kind_t::kind_accessor && + if ((Acc.MType == detail::kernel_param_kind_t::kind_accessor) && static_cast(Acc.MPtr) == AccImpl) { AccFound = true; break; @@ -938,6 +938,41 @@ static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, } } +static void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, + size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, + std::vector &Args, + bool IsESIMD) { + using detail::kernel_param_kind_t; + + range<3> &LAccSize = LAcc->MSize; + const int Dims = LAcc->MDims; + int SizeInBytes = LAcc->MElemSize; + for (int I = 0; I < Dims; ++I) + SizeInBytes *= LAccSize[I]; + + // Some backends do not accept zero-sized local memory arguments, so we + // make it a minimum allocation of 1 byte. + SizeInBytes = std::max(SizeInBytes, 1); + Args.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, SizeInBytes, + Index + IndexShift); + // TODO ESIMD currently does not suport MSize field passing yet + // accessor::init for ESIMD-mode accessor has a single field, translated + // to a single kernel argument set above. + if (!IsESIMD && !IsKernelCreatedFromSource) { + ++IndexShift; + const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(LAccSize[0]); + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, + SizeAccField, Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, + SizeAccField, Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, + SizeAccField, Index + IndexShift); + } +} + void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, bool IsESIMD) { @@ -1008,34 +1043,11 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } case access::target::local: { - detail::LocalAccessorImplHost *LAcc = + detail::LocalAccessorImplHost *LAccImpl = static_cast(Ptr); - range<3> &Size = LAcc->MSize; - const int Dims = LAcc->MDims; - int SizeInBytes = LAcc->MElemSize; - for (int I = 0; I < Dims; ++I) - SizeInBytes *= Size[I]; - // Some backends do not accept zero-sized local memory arguments, so we - // make it a minimum allocation of 1 byte. - SizeInBytes = std::max(SizeInBytes, 1); - impl->MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, - SizeInBytes, Index + IndexShift); - // TODO ESIMD currently does not suport MSize field passing yet - // accessor::init for ESIMD-mode accessor has a single field, translated - // to a single kernel argument set above. - if (!IsESIMD && !IsKernelCreatedFromSource) { - ++IndexShift; - const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(Size[0]); - addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, - Index + IndexShift); - ++IndexShift; - addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, - Index + IndexShift); - ++IndexShift; - addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, - Index + IndexShift); - } + addArgsForLocalAccessor(LAccImpl, Index, IndexShift, + IsKernelCreatedFromSource, impl->MArgs, IsESIMD); break; } case access::target::image: @@ -1058,19 +1070,45 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, } break; } + case kernel_param_kind_t::kind_dynamic_accessor: { + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + switch (AccTarget) { + case access::target::local: { + + auto *DynParamImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_impl *>(Ptr); + + registerDynamicParameter(DynParamImpl, Index + IndexShift); + + auto *DynLocalAccessorImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_local_accessor_impl *>( + DynParamImpl); + + addArgsForLocalAccessor(&DynLocalAccessorImpl->LAccImplHost, Index, + IndexShift, IsKernelCreatedFromSource, + impl->MArgs, IsESIMD); + break; + } + default: { + assert(false && "Unsupported dynamic accessor target"); + } + } + break; + } case kernel_param_kind_t::kind_dynamic_work_group_memory: { - auto *DynBase = static_cast< - ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); + auto *DynParamImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_impl *>(Ptr); - auto *DynWorkGroupBase = static_cast< - ext::oneapi::experimental::detail::dynamic_work_group_memory_base *>( - Ptr); + registerDynamicParameter(DynParamImpl, Index + IndexShift); - registerDynamicParameter(*DynBase, Index + IndexShift); + auto *DynWorkGroupImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_work_group_memory_impl *>( + DynParamImpl); addArg(kernel_param_kind_t::kind_std_layout, nullptr, - DynWorkGroupBase->BufferSize, Index + IndexShift); + DynWorkGroupImpl->BufferSizeInBytes, Index + IndexShift); break; } case kernel_param_kind_t::kind_work_group_memory: { @@ -1109,19 +1147,6 @@ void handler::setArgHelper(int ArgIndex, stream &&Str) { ArgIndex); } -void handler::setArgHelper( - int ArgIndex, - ext::oneapi::experimental::detail::dynamic_work_group_memory_base - &DynWorkGroupBase) { - - addArg(detail::kernel_param_kind_t::kind_dynamic_work_group_memory, - &DynWorkGroupBase, 0, ArgIndex); - - // Register the dynamic parameter with the handler for later association - // with the node being added - registerDynamicParameter(DynWorkGroupBase, ArgIndex); -} - // The argument can take up more space to store additional information about // MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor. // We use the worst-case estimate because the lifetime of the vector is short. @@ -1185,7 +1210,26 @@ void handler::extractArgsAndReqsFromLambda( static_cast(Ptr); Ptr = detail::getSyclObjImpl(*LocalAccBase).get(); } + } else if (Kind == detail::kernel_param_kind_t::kind_dynamic_accessor) { + // For args kind of accessor Size is information about accessor. + // The first 11 bits of Size encodes the accessor target. + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + // Only local targets are supported for dynamic accessors. + assert(AccTarget == access::target::local); + + ext::oneapi::experimental::detail::dynamic_parameter_base + *DynamicParamBase = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); + Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); + } else if (Kind == + detail::kernel_param_kind_t::kind_dynamic_work_group_memory) { + ext::oneapi::experimental::detail::dynamic_parameter_base + *DynamicParamBase = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); + Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); } + processArg(Ptr, Kind, Size, I, IndexShift, /*IsKernelCreatedFromSource=*/false, IsESIMD); } @@ -2168,8 +2212,9 @@ void handler::setNDRangeUsed(bool Value) { (void)Value; } #endif void handler::registerDynamicParameter( - ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase, + ext::oneapi::experimental::detail::dynamic_parameter_impl *DynamicParamImpl, int ArgIndex) { + if (MQueue && MQueue->hasCommandGraph()) { throw sycl::exception( make_error_code(errc::invalid), @@ -2181,10 +2226,21 @@ void handler::registerDynamicParameter( "Dynamic Parameters cannot be used with normal SYCL submissions"); } - auto Paraimpl = detail::getSyclObjImpl(DynamicParamBase); - impl->MDynamicParameters.emplace_back(Paraimpl.get(), ArgIndex); + impl->MDynamicParameters.emplace_back(DynamicParamImpl, ArgIndex); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +// TODO: Remove in the next ABI-breaking window. +void handler::registerDynamicParameter( + ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase, + int ArgIndex) { + ext::oneapi::experimental::detail::dynamic_parameter_impl *DynParamImpl = + detail::getSyclObjImpl(DynamicParamBase).get(); + + registerDynamicParameter(DynParamImpl, ArgIndex); +} +#endif + bool handler::eventNeeded() const { return impl->MEventNeeded; } void *handler::storeRawArg(const void *Ptr, size_t Size) { diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_dyn_local_accessor.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_dyn_local_accessor.cpp new file mode 100644 index 0000000000000..96cbceffee596 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_dyn_local_accessor.cpp @@ -0,0 +1,92 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests using a dynamic command-group object with dynamic_local_accessor. + +#include "../graph_common.hpp" +#include + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + constexpr int LocalSizeA{16}; + constexpr int LocalSizeB{64}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + + exp_ext::dynamic_local_accessor DynLocalMem(Graph, LocalSizeA); + + nd_range<1> NDrangeA{Size, LocalSizeA}; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(NDrangeA, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + auto LocalMem = DynLocalMem.get(); + + LocalMem[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrA[GlobalID] += LocalMem[i]; + } + }); + }; + + nd_range<1> NDrangeB{Size, LocalSizeB}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(NDrangeB, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + auto LocalMem = DynLocalMem.get(); + + LocalMem[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrB[GlobalID] += LocalMem[i]; + } + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, const int LocalSize) { + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (A ? LocalSize * LocalSize : 0)); + assert(HostDataB[i] == (B ? LocalSize * LocalSize : 0)); + } + }; + ExecuteGraphAndVerifyResults(true, false, LocalSizeA); + + DynamicCG.set_active_index(1); + DynLocalMem.update(LocalSizeB); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, LocalSizeB); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_local_accessor_basic.cpp b/sycl/test-e2e/Graph/Update/dyn_local_accessor_basic.cpp new file mode 100644 index 0000000000000..8ec857b40fc13 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_local_accessor_basic.cpp @@ -0,0 +1,71 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests updating dynamic_local_accessor with a new size. + +#include "../graph_common.hpp" +#include + +int main() { + queue Queue{}; + constexpr int LocalSize{16}; + + using T = int; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + std::vector HostDataA(Size); + + exp_ext::dynamic_local_accessor DynLocalMem{Graph, LocalSize}; + + Queue.memset(PtrA, 0, Size * sizeof(T)).wait(); + + nd_range<1> NDRange{range{Size}, range{LocalSize}}; + + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.parallel_for(NDRange, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + auto LocalMem = DynLocalMem.get(); + + LocalMem[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrA[GlobalID] += LocalMem[i]; + } + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == LocalSize * LocalSize); + } + + Queue.memset(PtrA, 0, Size * sizeof(T)).wait(); + + constexpr int NewLocalSize{32}; + + DynLocalMem.update(NewLocalSize); + KernelNode.update_nd_range(nd_range<1>{range{Size}, range{NewLocalSize}}); + ExecGraph.update(KernelNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == NewLocalSize * NewLocalSize); + } + + free(PtrA, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple.cpp b/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple.cpp new file mode 100644 index 0000000000000..b8027e710a0f2 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple.cpp @@ -0,0 +1,77 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests using more than one dynamic_local_accessor object in the same node. + +#include "../graph_common.hpp" +#include + +int main() { + queue Queue{}; + + constexpr int LocalSize{16}; + + using T = int; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + std::vector HostDataA(Size); + + exp_ext::dynamic_local_accessor DynLocalMemA{Graph, LocalSize}; + exp_ext::dynamic_local_accessor DynLocalMemB{Graph, LocalSize}; + + Queue.memset(PtrA, 0, Size * sizeof(T)).wait(); + + nd_range<1> NDRange{range{Size}, range{LocalSize}}; + + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.parallel_for(NDRange, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + + auto LocalMemA = DynLocalMemA.get(); + auto LocalMemB = DynLocalMemB.get(); + + LocalMemA[Item.get_local_id()] = LocalRange; + LocalMemB[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrA[GlobalID] += (T)(LocalMemA[i] + LocalMemB[i]); + } + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == LocalSize * LocalSize * 2); + } + + Queue.memset(PtrA, 0, Size * sizeof(T)).wait(); + + constexpr int NewLocalSize{32}; + + DynLocalMemA.update(NewLocalSize); + DynLocalMemB.update(NewLocalSize); + KernelNode.update_nd_range(nd_range<1>{range{Size}, range{NewLocalSize}}); + ExecGraph.update(KernelNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == NewLocalSize * NewLocalSize * 2); + } + + free(PtrA, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple_nodes.cpp b/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple_nodes.cpp new file mode 100644 index 0000000000000..b9c8ba69f90be --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple_nodes.cpp @@ -0,0 +1,131 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests using a dynamic_local_accessor with multiple nodes. + +#include "../graph_common.hpp" +#include + +int main() { + queue Queue{}; + + constexpr int LocalSize{16}; + + using T = int; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size * Size, Queue); + std::vector HostDataA(Size * Size); + + exp_ext::dynamic_local_accessor DynLocalMemA{ + Graph, range<2>{LocalSize, LocalSize}}; + exp_ext::dynamic_local_accessor DynLocalMemC{Graph, + range<1>{LocalSize}}; + + Queue.memset(PtrA, 0, Size * Size * sizeof(T)).wait(); + + nd_range<2> NDRange2D{range<2>{Size, Size}, range<2>{LocalSize, LocalSize}}; + + auto KernelNodeA = Graph.add([&](handler &cgh) { + cgh.parallel_for(NDRange2D, [=](nd_item<2> Item) { + size_t GlobalID = Item.get_global_linear_id(); + auto LocalRange = Item.get_local_range(0); + const auto i = Item.get_local_id()[0]; + const auto j = Item.get_local_id()[1]; + + auto LocalMemA = DynLocalMemA.get(); + + LocalMemA[i][j] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t k{0}; k < LocalRange; ++k) { + for (size_t z{0}; z < LocalRange; ++z) { + PtrA[GlobalID] += (T)(LocalMemA[k][z]); + } + } + }); + }); + + auto KernelNodeB = Graph.add( + [&](handler &cgh) { + cgh.parallel_for(NDRange2D, [=](nd_item<2> Item) { + size_t GlobalID = Item.get_global_linear_id(); + auto LocalRange = Item.get_local_range(0); + const auto i = Item.get_local_id()[0]; + const auto j = Item.get_local_id()[1]; + + auto LocalMemA = DynLocalMemA.get(); + + LocalMemA[i][j] = LocalRange; + group_barrier(Item.get_group()); + + // Substracting what was added in NodeA gives 0. + for (size_t k{0}; k < LocalRange; ++k) { + for (size_t z{0}; z < LocalRange; ++z) { + PtrA[GlobalID] -= (T)(LocalMemA[k][z]); + } + } + }); + }, + exp_ext::property::node::depends_on{KernelNodeA}); + + nd_range<1> NDRange{Size * Size, LocalSize}; + auto KernelNodeC = Graph.add( + [&](handler &cgh) { + cgh.parallel_for(NDRange, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + + auto LocalMemC = DynLocalMemC.get(); + + LocalMemC[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrA[GlobalID] += (T)(LocalMemC[i]); + } + }); + }, + exp_ext::property::node::depends_on{KernelNodeB}); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size * Size).wait(); + for (size_t i = 0; i < Size * Size; i++) { + assert(HostDataA[i] == LocalSize * LocalSize); + } + + Queue.memset(PtrA, 0, Size * Size * sizeof(T)).wait(); + + constexpr size_t NewLocalSize{32}; + + DynLocalMemA.update(range<2>{NewLocalSize, NewLocalSize}); + DynLocalMemC.update(range<1>{NewLocalSize}); + + KernelNodeA.update_nd_range( + nd_range<2>{range<2>{Size, Size}, range<2>{NewLocalSize, NewLocalSize}}); + KernelNodeB.update_nd_range( + nd_range<2>{range<2>{Size, Size}, range<2>{NewLocalSize, NewLocalSize}}); + KernelNodeC.update_nd_range(nd_range<1>{Size * Size, NewLocalSize}); + + ExecGraph.update(KernelNodeA); + ExecGraph.update(KernelNodeB); + ExecGraph.update(KernelNodeC); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size * Size).wait(); + for (size_t i = 0; i < Size * Size; i++) { + assert(HostDataA[i] == NewLocalSize * NewLocalSize); + } + + free(PtrA, Queue); + return 0; +} \ No newline at end of file diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6517e5ce6fb03..f0b4bc6773ae9 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3086,7 +3086,6 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail19compile_from_sourceERNS0_13kernel_ _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKNS3_14raw_kernel_argEm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKvm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base14updateAccessorEPKNS0_6detail16AccessorBaseHostE -_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base18updateWorkGroupMemEm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEE _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEEmPKv _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ENS3_13command_graphILNS3_11graph_stateE0EEE @@ -3520,7 +3519,14 @@ _ZN4sycl3_V17handler11saveCodeLocENS0_6detail13code_locationE _ZN4sycl3_V17handler11saveCodeLocENS0_6detail13code_locationEb _ZN4sycl3_V17handler11storeRawArgEPKvm _ZN4sycl3_V17handler12addReductionERKSt10shared_ptrIKvE -_ZN4sycl3_V17handler12setArgHelperEiRNS0_3ext6oneapi12experimental6detail30dynamic_work_group_memory_baseE +_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_base18updateWorkGroupMemEm +_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEEm +_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_baseC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE +_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_baseC2ENS3_13command_graphILNS3_11graph_stateE0EEEm +_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_baseC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE +_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ERKSt10shared_ptrINS4_22dynamic_parameter_implEE +_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_base19updateLocalAccessorENS0_5rangeILi3EEE +_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ERKSt10shared_ptrINS4_22dynamic_parameter_implEE _ZN4sycl3_V17handler12setArgHelperEiRNS0_6detail22work_group_memory_implE _ZN4sycl3_V17handler12setArgHelperEiONS0_6streamE _ZN4sycl3_V17handler13getKernelNameEv @@ -3569,6 +3575,7 @@ _ZN4sycl3_V17handler24GetRangeRoundingSettingsERmS2_S2_ _ZN4sycl3_V17handler24ext_intel_read_host_pipeENS0_6detail11string_viewEPvmb _ZN4sycl3_V17handler24ext_oneapi_memcpy2d_implEPvmPKvmmm _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm +_ZN4sycl3_V17handler24registerDynamicParameterEPNS0_3ext6oneapi12experimental6detail22dynamic_parameter_implEi _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 9682f6412cce0..3a1a34edba1d3 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4344,6 +4344,7 @@ ?reduGetMaxWGSize@detail@_V1@sycl@@YA_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z ?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z ?registerDynamicParameter@handler@_V1@sycl@@AEAAXAEAVdynamic_parameter_base@detail@experimental@oneapi@ext@23@H@Z +?registerDynamicParameter@handler@_V1@sycl@@AEAAXPEAVdynamic_parameter_impl@detail@experimental@oneapi@ext@23@H@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVdevice@45@AEBVcontext@45@@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVqueue@45@@Z ?release_external_semaphore@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_semaphore@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4370,7 +4371,6 @@ ?setArgHelper@handler@_V1@sycl@@AEAAXH$$QEAVraw_kernel_arg@experimental@oneapi@ext@23@@Z ?setArgHelper@handler@_V1@sycl@@AEAAXH$$QEAVsampler@23@@Z ?setArgHelper@handler@_V1@sycl@@AEAAXH$$QEAVstream@23@@Z -?setArgHelper@handler@_V1@sycl@@AEAAXHAEAVdynamic_work_group_memory_base@detail@experimental@oneapi@ext@23@@Z ?setArgHelper@handler@_V1@sycl@@AEAAXHAEAVwork_group_memory_impl@detail@23@@Z ?setArgsHelper@handler@_V1@sycl@@AEAAXH@Z ?setArgsToAssociatedAccessors@handler@_V1@sycl@@AEAAXXZ @@ -4450,7 +4450,25 @@ ?updateAccessor@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBVAccessorBaseHost@267@@Z ?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBVraw_kernel_arg@34567@_K@Z ?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBX_K@Z -?updateWorkGroupMem@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAX_K@Z +??4dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z +??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$range@$02@56@HHAEBVproperty_list@56@@Z +??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV?$shared_ptr@Vdynamic_parameter_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z +??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z +??4dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z +??4dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z +??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ +??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$command_graph@$0A@@23456@_K@Z +?updateLocalAccessor@dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXV?$range@$02@67@@Z +?setArgHelper@handler@_V1@sycl@@AEAAXHAEAVdynamic_local_accessor_base@detail@experimental@oneapi@ext@23@@Z +?setArgHelper@handler@_V1@sycl@@AEAAXHAEAVdynamic_work_group_memory_base@detail@experimental@oneapi@ext@23@@Z +??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z +??4dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z +??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z +??1dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ +?updateWorkGroupMem@dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAX_K@Z +??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ +??1dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ +??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z ?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?verifyReductionProps@detail@_V1@sycl@@YAXAEBVproperty_list@23@@Z diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 453e1beb72adf..dfb100acec848 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -689,3 +689,18 @@ TEST_F(CommandGraphTest, DynamicWorkGroupMemoryGet) { Graph, LocalSize}; ASSERT_ANY_THROW(DynLocalMem.get()); } + +// Tests that dynamic_local_accessor.get() will throw on the host side. +TEST_F(CommandGraphTest, DynamicLocalAccessorGet) { + device Dev; + context Ctx{{Dev}}; + queue Queue{Ctx, Dev}; + constexpr int LocalSize{32}; + + ext::oneapi::experimental::command_graph Graph{Queue.get_context(), + Queue.get_device()}; + + ext::oneapi::experimental::dynamic_local_accessor DynLocalMem{ + Graph, LocalSize}; + ASSERT_ANY_THROW(DynLocalMem.get()); +} diff --git a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp index 9451bf1334b12..c6a9333cb02a5 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp @@ -100,6 +100,28 @@ TEST_F(CommandGraphTest, DynamicParamSemantics) { testSemantics>(Factory)); } +TEST_F(CommandGraphTest, DynamicWorkGroupMemorySemantics) { + sycl::queue Queue; + experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); + + auto Factory = [&]() { + return experimental::dynamic_work_group_memory(Graph, 1); + }; + ASSERT_NO_FATAL_FAILURE( + testSemantics>(Factory)); +} + +TEST_F(CommandGraphTest, DynamicLocalAccessorSemantics) { + sycl::queue Queue; + experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); + + auto Factory = [&]() { + return experimental::dynamic_local_accessor(Graph, 1); + }; + ASSERT_NO_FATAL_FAILURE( + (testSemantics>(Factory))); +} + /** * Checks for potential hash collisions in the hash implementations of graph * related classes. diff --git a/sycl/unittests/Extensions/CommandGraph/Update.cpp b/sycl/unittests/Extensions/CommandGraph/Update.cpp index db0e04c0cccaa..1f38e08eeb559 100644 --- a/sycl/unittests/Extensions/CommandGraph/Update.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Update.cpp @@ -26,20 +26,30 @@ TEST_F(CommandGraphTest, UpdatableException) { EXPECT_ANY_THROW(ExecGraphNoUpdatable.update(Node)); } -TEST_F(CommandGraphTest, DynamicParamRegister) { - // Check that registering a dynamic param with a node from a graph that was +TEST_F(CommandGraphTest, DynamicObjRegister) { + // Check that registering a dynamic object with a node from a graph that was // not passed to its constructor does not throw. + + auto CheckRegisterWrongGraph = [&](auto &DynObj) { + auto OtherGraph = + experimental::command_graph(Queue.get_context(), Queue.get_device()); + auto Node = OtherGraph.add([&](sycl::handler &cgh) { + // This should not throw + EXPECT_NO_THROW(cgh.set_arg(0, DynObj)); + cgh.single_task>([]() {}); + }); + }; + // TODO: Update test when deprecated constructors that take a graph have been // removed. - experimental::dynamic_parameter DynamicParam(Graph, int{}); + experimental::dynamic_parameter DynamicParam{Graph, int{}}; + ASSERT_NO_FATAL_FAILURE(CheckRegisterWrongGraph(DynamicParam)); - auto OtherGraph = - experimental::command_graph(Queue.get_context(), Queue.get_device()); - auto Node = OtherGraph.add([&](sycl::handler &cgh) { - // This should not throw - EXPECT_NO_THROW(cgh.set_arg(0, DynamicParam)); - cgh.single_task>([]() {}); - }); + experimental::dynamic_work_group_memory DynamicWorkGroupMem{Graph, 1}; + ASSERT_NO_FATAL_FAILURE(CheckRegisterWrongGraph(DynamicWorkGroupMem)); + + experimental::dynamic_local_accessor DynamicLocalAcc{Graph, 1}; + ASSERT_NO_FATAL_FAILURE(CheckRegisterWrongGraph(DynamicLocalAcc)); } TEST_F(CommandGraphTest, UpdateNodeNotInGraph) { @@ -67,73 +77,83 @@ TEST_F(CommandGraphTest, UpdateWithUnchangedNode) { } TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) { - // Check that registering a dynamic parameter with various node types either + // Check that registering a dynamic object with various node types either // throws or does not throw as appropriate - // Allocate some pointers for memory nodes - int *PtrA = malloc_device(16, Queue); - int *PtrB = malloc_device(16, Queue); + auto CheckNodeCompatibility = [&](auto &DynObj) { + // Allocate some pointers for memory nodes + int *PtrA = malloc_device(16, Queue); + int *PtrB = malloc_device(16, Queue); + + ASSERT_NO_THROW(auto NodeKernel = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.single_task>([]() {}); + })); + + ASSERT_ANY_THROW(auto NodeMemcpy = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.memcpy(PtrA, PtrB, 16 * sizeof(int)); + })); + + ASSERT_ANY_THROW(auto NodeMemset = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.memset(PtrB, 7, 16 * sizeof(int)); + })); + + ASSERT_ANY_THROW(auto NodeMemfill = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.fill(PtrB, 7, 16); + })); + + ASSERT_ANY_THROW(auto NodePrefetch = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.prefetch(PtrA, 16 * sizeof(int)); + })); + + ASSERT_ANY_THROW(auto NodeMemadvise = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.mem_advise(PtrA, 16 * sizeof(int), 1); + })); + + ASSERT_ANY_THROW(auto NodeHostTask = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.host_task([]() {}); + })); + + ASSERT_ANY_THROW(auto NodeBarreriTask = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.ext_oneapi_barrier(); + })); + + Graph.begin_recording(Queue); + ASSERT_ANY_THROW(auto NodeBarrierTask = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.ext_oneapi_barrier(); + })); + Graph.end_recording(Queue); + + auto NodeEmpty = Graph.add(); + + experimental::command_graph Subgraph(Queue.get_context(), + Queue.get_device()); + // Add an empty node to the subgraph + Subgraph.add(); + + auto SubgraphExec = Subgraph.finalize(); + ASSERT_ANY_THROW(auto NodeSubgraph = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.ext_oneapi_graph(SubgraphExec); + })); + }; experimental::dynamic_parameter DynamicParam{Graph, int{}}; + ASSERT_NO_FATAL_FAILURE(CheckNodeCompatibility(DynamicParam)); - ASSERT_NO_THROW(auto NodeKernel = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.single_task>([]() {}); - })); - - ASSERT_ANY_THROW(auto NodeMemcpy = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.memcpy(PtrA, PtrB, 16 * sizeof(int)); - })); - - ASSERT_ANY_THROW(auto NodeMemset = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.memset(PtrB, 7, 16 * sizeof(int)); - })); - - ASSERT_ANY_THROW(auto NodeMemfill = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.fill(PtrB, 7, 16); - })); - - ASSERT_ANY_THROW(auto NodePrefetch = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.prefetch(PtrA, 16 * sizeof(int)); - })); - - ASSERT_ANY_THROW(auto NodeMemadvise = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.mem_advise(PtrA, 16 * sizeof(int), 1); - })); - - ASSERT_ANY_THROW(auto NodeHostTask = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.host_task([]() {}); - })); - - ASSERT_ANY_THROW(auto NodeBarreriTask = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.ext_oneapi_barrier(); - })); - - Graph.begin_recording(Queue); - ASSERT_ANY_THROW(auto NodeBarrierTask = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.ext_oneapi_barrier(); - })); - Graph.end_recording(Queue); - - auto NodeEmpty = Graph.add(); - - experimental::command_graph Subgraph(Queue.get_context(), Dev); - // Add an empty node to the subgraph - Subgraph.add(); + experimental::dynamic_work_group_memory DynamicWorkGroupMem{Graph, 1}; + ASSERT_NO_FATAL_FAILURE(CheckNodeCompatibility(DynamicWorkGroupMem)); - auto SubgraphExec = Subgraph.finalize(); - ASSERT_ANY_THROW(auto NodeSubgraph = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.ext_oneapi_graph(SubgraphExec); - })); + experimental::dynamic_local_accessor DynamicLocalAcc{Graph, 1}; + ASSERT_NO_FATAL_FAILURE(CheckNodeCompatibility(DynamicLocalAcc)); } TEST_F(CommandGraphTest, UpdateRangeErrors) {