From 00d1cbe2d02819ad50b9d74d899211babacc0e82 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 24 Feb 2023 08:07:58 -0800 Subject: [PATCH 1/5] Added rough P2P usage examples. Signed-off-by: JackAKirk --- SYCL/P2P/p2p_usm_access.cpp | 84 +++++++++++++++++++++++++++++++++++++ SYCL/P2P/p2p_usm_memcpy.cpp | 57 +++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 SYCL/P2P/p2p_usm_access.cpp create mode 100644 SYCL/P2P/p2p_usm_memcpy.cpp diff --git a/SYCL/P2P/p2p_usm_access.cpp b/SYCL/P2P/p2p_usm_access.cpp new file mode 100644 index 0000000000..2d763a1e9b --- /dev/null +++ b/SYCL/P2P/p2p_usm_access.cpp @@ -0,0 +1,84 @@ +#include +#include +#include + +using namespace sycl; + +int main() { + int Data[10] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + { + + std::vector Devs; + + // Note that this code is temporary due to the temporary lack of multiple devices per sycl context in the nvidia backend. + //////////////////////// + for (const auto &plt : sycl::platform::get_platforms()) { + + if (plt.get_backend() == sycl::backend::cuda) + Devs.push_back(plt.get_devices()[0]); + } + //////////////////////// + + std::vector Queues; + std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), + [](const sycl::device &D) { return sycl::queue{D}; }); + + assert(Queues.size() > 1); + + int N = 100; + int val = 5; + int *input = (int *)malloc(sizeof(int) * N); + for (int i = 0; i < N; i++) { + input[i] = val; + } + + // note: practically it could also be good to provide clear directions to + // documentation showing users how to make sure they are constructing queues + // using distinct devices. + + auto Dev0 = Queues[0].get_device(); + auto Dev1 = Queues[1].get_device(); + + assert(Dev0 != Dev1); + + int *arr0 = malloc(N, Queues[0], usm::alloc::device); + int *arr1 = malloc(N, Queues[1], usm::alloc::device); + + // note: in real use would obviously load/set arr0/arr1 with meaningful + // data. + + if (Dev0.ext_oneapi_can_access_peer( + Dev1, sycl::ext::oneapi::peer_access::access_supported)) { + // dev0 enables itself to access dev1 + Dev0.ext_oneapi_enable_peer_access(Dev1); + // dev1 enables itself to access dev0 + Dev1.ext_oneapi_enable_peer_access(Dev0); + // Dev0.ext_oneapi_disable_peer_access(Dev1); + } + + // access Device/Queue 1 + Queues[0].submit([&](handler &cgh) { + auto myRange = range<1>(N); + auto myKernel = ([=](id<1> idx) { + arr0[idx] = idx[0]; + arr1[idx] = idx[0]; + }); + + cgh.parallel_for(myRange, myKernel); + }).wait(); + + sycl::free(arr0, Queues[0]); + sycl::free(arr1, Queues[1]); + + if (Dev0.ext_oneapi_can_access_peer( + Dev1, sycl::ext::oneapi::peer_access::access_supported)) { + Dev0.ext_oneapi_disable_peer_access(Dev1); + Dev1.ext_oneapi_disable_peer_access(Dev0); + } + } + + //TODO we should also test inter-device atomics + // Devs[0].ext_oneapi_can_access_peer(Devs[1], sycl::ext::oneapi::peer_access::atomics_supported); + + return 0; +} diff --git a/SYCL/P2P/p2p_usm_memcpy.cpp b/SYCL/P2P/p2p_usm_memcpy.cpp new file mode 100644 index 0000000000..538579ddea --- /dev/null +++ b/SYCL/P2P/p2p_usm_memcpy.cpp @@ -0,0 +1,57 @@ +#include +#include +#include + +using namespace sycl; + +int main() { + int Data[10] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + { + + std::vector Devs; + + // Note that this code is temporary due to the temporary lack of multiple devices per sycl context in the nvidia backend. + //////////////////////// + for (const auto &plt : sycl::platform::get_platforms()) { + + if (plt.get_backend() == sycl::backend::cuda) + Devs.push_back(plt.get_devices()[0]); + } + //////////////////////// + + ///// Enable bi-directional peer copies + Devs[0].ext_oneapi_enable_peer_access(Devs[1]); + + std::vector Queues; + std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), + [](const sycl::device &D) { return sycl::queue{D}; }); + + assert(Queues.size() > 1); + + int N = 100; + int val = 5; + int *input = (int *)malloc(sizeof(int) * N); + for (int i = 0; i < N; i++) { + input[i] = val; + } + + int *arr0 = malloc(N, Queues[0], usm::alloc::device); + Queues[0].memcpy(arr0, input, N * sizeof(int)); + Queues[0].wait(); + + int *arr2 = malloc(N, Queues[1], usm::alloc::device); + // Copy device usm allocated in devices/cuContexts + Queues[0].copy(arr1, arr0, N).wait(); + int *out; + out = new int[N]; + Queues[0].memcpy(out, arr1, N * sizeof(int)).wait(); + + sycl::free(arr0, Queues[0]); + sycl::free(arr1, Queues[1]); + + std::cout << out[0]; + delete[] out; + } + + return 0; +} From 45a7859e628404b47d52b5626f75ff44e9580afe Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 28 Feb 2023 16:32:40 +0000 Subject: [PATCH 2/5] correct memcpy Signed-off-by: JackAKirk --- SYCL/P2P/p2p_usm_memcpy.cpp | 83 +++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/SYCL/P2P/p2p_usm_memcpy.cpp b/SYCL/P2P/p2p_usm_memcpy.cpp index 538579ddea..98bcabac94 100644 --- a/SYCL/P2P/p2p_usm_memcpy.cpp +++ b/SYCL/P2P/p2p_usm_memcpy.cpp @@ -5,53 +5,56 @@ using namespace sycl; int main() { - int Data[10] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; - { - std::vector Devs; - - // Note that this code is temporary due to the temporary lack of multiple devices per sycl context in the nvidia backend. - //////////////////////// - for (const auto &plt : sycl::platform::get_platforms()) { + std::vector Devs; - if (plt.get_backend() == sycl::backend::cuda) - Devs.push_back(plt.get_devices()[0]); - } - //////////////////////// - - ///// Enable bi-directional peer copies - Devs[0].ext_oneapi_enable_peer_access(Devs[1]); - - std::vector Queues; - std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), - [](const sycl::device &D) { return sycl::queue{D}; }); - - assert(Queues.size() > 1); - - int N = 100; - int val = 5; - int *input = (int *)malloc(sizeof(int) * N); - for (int i = 0; i < N; i++) { - input[i] = val; - } + // Note that this code is temporary due to the temporary lack of multiple devices per sycl context in the nvidia backend. + //////////////////////// + for (const auto &plt : sycl::platform::get_platforms()) { + + if (plt.get_backend() == sycl::backend::cuda) + Devs.push_back(plt.get_devices()[0]); + } + //////////////////////// + + ///// Enable bi-directional peer copies + Devs[0].ext_oneapi_enable_peer_access(Devs[1]); + + std::vector Queues; + std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), + [](const sycl::device &D) { return sycl::queue{D}; }); - int *arr0 = malloc(N, Queues[0], usm::alloc::device); - Queues[0].memcpy(arr0, input, N * sizeof(int)); - Queues[0].wait(); + assert(Queues.size() > 1); - int *arr2 = malloc(N, Queues[1], usm::alloc::device); - // Copy device usm allocated in devices/cuContexts - Queues[0].copy(arr1, arr0, N).wait(); - int *out; - out = new int[N]; - Queues[0].memcpy(out, arr1, N * sizeof(int)).wait(); + int N = 100; + int *input = (int *)malloc(sizeof(int) * N); + for (int i = 0; i < N; i++) { + input[i] = i; + } + + int *arr0 = malloc(N, Queues[0], usm::alloc::device); + Queues[0].memcpy(arr0, input, N * sizeof(int)).wait(); + + int *arr1 = malloc(N, Queues[1], usm::alloc::device); + Queues[1].copy(arr0, arr1, N).wait(); - sycl::free(arr0, Queues[0]); - sycl::free(arr1, Queues[1]); + int *out; + out = new int[N]; + Queues[1].copy(arr1, out, N).wait(); - std::cout << out[0]; - delete[] out; + sycl::free(arr0, Queues[0]); + sycl::free(arr1, Queues[1]); + + bool ok = true; + for (int i = 0; i < N; i++) { + if (out[i] != input[i]) { + printf("%d %d\n", out[i], input[i]); + ok = false; //break; + } } + delete[] out; + + printf("%s\n", ok ? "PASS" : "FAIL"); return 0; } From 78bee4e7a508d3917292c5f5fd3ea8cf923e65e1 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 3 Mar 2023 11:32:31 -0800 Subject: [PATCH 3/5] Updated and added atomics test. Signed-off-by: JackAKirk --- SYCL/P2P/p2p_usm_access.cpp | 84 ---------------------------------- SYCL/P2P/p2p_usm_memcpy.cpp | 60 ------------------------- SYCL/USM/P2P/p2p_access.cpp | 71 +++++++++++++++++++++++++++++ SYCL/USM/P2P/p2p_atomics.cpp | 87 ++++++++++++++++++++++++++++++++++++ SYCL/USM/P2P/p2p_memcpy.cpp | 78 ++++++++++++++++++++++++++++++++ 5 files changed, 236 insertions(+), 144 deletions(-) delete mode 100644 SYCL/P2P/p2p_usm_access.cpp delete mode 100644 SYCL/P2P/p2p_usm_memcpy.cpp create mode 100644 SYCL/USM/P2P/p2p_access.cpp create mode 100644 SYCL/USM/P2P/p2p_atomics.cpp create mode 100644 SYCL/USM/P2P/p2p_memcpy.cpp diff --git a/SYCL/P2P/p2p_usm_access.cpp b/SYCL/P2P/p2p_usm_access.cpp deleted file mode 100644 index 2d763a1e9b..0000000000 --- a/SYCL/P2P/p2p_usm_access.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include -#include -#include - -using namespace sycl; - -int main() { - int Data[10] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; - { - - std::vector Devs; - - // Note that this code is temporary due to the temporary lack of multiple devices per sycl context in the nvidia backend. - //////////////////////// - for (const auto &plt : sycl::platform::get_platforms()) { - - if (plt.get_backend() == sycl::backend::cuda) - Devs.push_back(plt.get_devices()[0]); - } - //////////////////////// - - std::vector Queues; - std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), - [](const sycl::device &D) { return sycl::queue{D}; }); - - assert(Queues.size() > 1); - - int N = 100; - int val = 5; - int *input = (int *)malloc(sizeof(int) * N); - for (int i = 0; i < N; i++) { - input[i] = val; - } - - // note: practically it could also be good to provide clear directions to - // documentation showing users how to make sure they are constructing queues - // using distinct devices. - - auto Dev0 = Queues[0].get_device(); - auto Dev1 = Queues[1].get_device(); - - assert(Dev0 != Dev1); - - int *arr0 = malloc(N, Queues[0], usm::alloc::device); - int *arr1 = malloc(N, Queues[1], usm::alloc::device); - - // note: in real use would obviously load/set arr0/arr1 with meaningful - // data. - - if (Dev0.ext_oneapi_can_access_peer( - Dev1, sycl::ext::oneapi::peer_access::access_supported)) { - // dev0 enables itself to access dev1 - Dev0.ext_oneapi_enable_peer_access(Dev1); - // dev1 enables itself to access dev0 - Dev1.ext_oneapi_enable_peer_access(Dev0); - // Dev0.ext_oneapi_disable_peer_access(Dev1); - } - - // access Device/Queue 1 - Queues[0].submit([&](handler &cgh) { - auto myRange = range<1>(N); - auto myKernel = ([=](id<1> idx) { - arr0[idx] = idx[0]; - arr1[idx] = idx[0]; - }); - - cgh.parallel_for(myRange, myKernel); - }).wait(); - - sycl::free(arr0, Queues[0]); - sycl::free(arr1, Queues[1]); - - if (Dev0.ext_oneapi_can_access_peer( - Dev1, sycl::ext::oneapi::peer_access::access_supported)) { - Dev0.ext_oneapi_disable_peer_access(Dev1); - Dev1.ext_oneapi_disable_peer_access(Dev0); - } - } - - //TODO we should also test inter-device atomics - // Devs[0].ext_oneapi_can_access_peer(Devs[1], sycl::ext::oneapi::peer_access::atomics_supported); - - return 0; -} diff --git a/SYCL/P2P/p2p_usm_memcpy.cpp b/SYCL/P2P/p2p_usm_memcpy.cpp deleted file mode 100644 index 98bcabac94..0000000000 --- a/SYCL/P2P/p2p_usm_memcpy.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include - -using namespace sycl; - -int main() { - - std::vector Devs; - - // Note that this code is temporary due to the temporary lack of multiple devices per sycl context in the nvidia backend. - //////////////////////// - for (const auto &plt : sycl::platform::get_platforms()) { - - if (plt.get_backend() == sycl::backend::cuda) - Devs.push_back(plt.get_devices()[0]); - } - //////////////////////// - - ///// Enable bi-directional peer copies - Devs[0].ext_oneapi_enable_peer_access(Devs[1]); - - std::vector Queues; - std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), - [](const sycl::device &D) { return sycl::queue{D}; }); - - assert(Queues.size() > 1); - - int N = 100; - int *input = (int *)malloc(sizeof(int) * N); - for (int i = 0; i < N; i++) { - input[i] = i; - } - - int *arr0 = malloc(N, Queues[0], usm::alloc::device); - Queues[0].memcpy(arr0, input, N * sizeof(int)).wait(); - - int *arr1 = malloc(N, Queues[1], usm::alloc::device); - Queues[1].copy(arr0, arr1, N).wait(); - - int *out; - out = new int[N]; - Queues[1].copy(arr1, out, N).wait(); - - sycl::free(arr0, Queues[0]); - sycl::free(arr1, Queues[1]); - - bool ok = true; - for (int i = 0; i < N; i++) { - if (out[i] != input[i]) { - printf("%d %d\n", out[i], input[i]); - ok = false; //break; - } - } - delete[] out; - - printf("%s\n", ok ? "PASS" : "FAIL"); - - return 0; -} diff --git a/SYCL/USM/P2P/p2p_access.cpp b/SYCL/USM/P2P/p2p_access.cpp new file mode 100644 index 0000000000..6e04f390d7 --- /dev/null +++ b/SYCL/USM/P2P/p2p_access.cpp @@ -0,0 +1,71 @@ +// REQUIRES: cuda +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %t.out + +#include +#include + +using namespace sycl; + +int main() { + + // Note that this code will largely be removed: it is temporary due to the + // temporary lack of multiple devices per sycl context in the Nvidia backend. + // A portable implementation, using a single gpu platform, should be possible + // once the Nvidia context issues are resolved. + //////////////////////////////////////////////////////////////////////// + std::vector Devs; + for (const auto &plt : sycl::platform::get_platforms()) { + + if (plt.get_backend() == sycl::backend::ext_oneapi_cuda) + Devs.push_back(plt.get_devices()[0]); + } + if (Devs.size() < 2) { + std::cout << "Cannot test P2P capabilities, at least two devices are " + "required, exiting." + << std::endl; + return 0; + } + + std::vector Queues; + std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), + [](const sycl::device &D) { return sycl::queue{D}; }); + //////////////////////////////////////////////////////////////////////// + + if (!Devs[0].ext_oneapi_can_access_peer( + Devs[1], sycl::ext::oneapi::peer_access::access_supported)) { + std::cout << "P2P access is not supported by devices, exiting." + << std::endl; + return 0; + } + + // Enables Devs[0] to access Devs[1] memory. + Devs[0].ext_oneapi_enable_peer_access(Devs[1]); + + auto *arr1 = malloc(2, Queues[1], usm::alloc::device); + + // Calling fill on Devs[1] data with Devs[0] queue requires P2P enabled. + Queues[0].fill(arr1, 2, 2).wait(); + + // Access/write Devs[1] data with Devs[0] queue. + Queues[0] + .submit([&](handler &cgh) { + auto myRange = range<1>(1); + auto myKernel = ([=](id<1> idx) { arr1[0] *= 2; }); + + cgh.parallel_for(myRange, myKernel); + }) + .wait(); + + int2 out; + + Queues[0].memcpy(&out, arr1, 2 * sizeof(int)).wait(); + assert(out[0] == 4); + assert(out[1] == 2); + + sycl::free(arr1, Queues[1]); + + Devs[0].ext_oneapi_disable_peer_access(Devs[1]); + + return 0; +} diff --git a/SYCL/USM/P2P/p2p_atomics.cpp b/SYCL/USM/P2P/p2p_atomics.cpp new file mode 100644 index 0000000000..457221da45 --- /dev/null +++ b/SYCL/USM/P2P/p2p_atomics.cpp @@ -0,0 +1,87 @@ +// REQUIRES: cuda +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %t.out + +#include +#include +#include +#include + +using namespace sycl; + +// number of atomic operations +constexpr size_t N = 512; + +int main() { + + // Note that this code will largely be removed: it is temporary due to the + // temporary lack of multiple devices per sycl context in the Nvidia backend. + // A portable implementation, using a single gpu platform, should be possible + // once the Nvidia context issues are resolved. + //////////////////////////////////////////////////////////////////////// + std::vector Devs; + for (const auto &plt : sycl::platform::get_platforms()) { + + if (plt.get_backend() == sycl::backend::ext_oneapi_cuda) + Devs.push_back(plt.get_devices()[0]); + } + if (Devs.size() < 2) { + std::cout << "Cannot test P2P capabilities, at least two devices are " + "required, exiting." + << std::endl; + return 0; + } + + std::vector Queues; + std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), + [](const sycl::device &D) { return sycl::queue{D}; }); + //////////////////////////////////////////////////////////////////////// + + auto Dev0 = Queues[0].get_device(); + auto Dev1 = Queues[1].get_device(); + + assert(Dev0 != Dev1); + + if (!Devs[0].ext_oneapi_can_access_peer( + Devs[1], sycl::ext::oneapi::peer_access::atomics_supported)) { + std::cout << "P2P atomics are not supported by devices, exiting." + << std::endl; + return 0; + } + + // Enables Dev1 to access Dev0 memory. + Dev1.ext_oneapi_enable_peer_access(Dev0); + + std::vector input(N); + std::iota(input.begin(), input.end(), 0); + + double h_sum = 0.; + for (const auto &value : input) { + h_sum += value; + } + + double *d_sum = malloc_shared(1, Queues[0]); + double *d_in = malloc_device(N, Queues[0]); + + Queues[0].memcpy(d_in, &input[0], N * sizeof(double)); + Queues[0].wait(); + + range global_range{N}; + + *d_sum = 0.; + Queues[1].submit([&](handler &h) { + h.parallel_for(global_range, [=](id<1> i) { + sycl::atomic_ref(*d_sum) += d_in[i]; + }); + }); + Queues[1].wait(); + + assert(*d_sum == h_sum); + + free(d_sum, Queues[0]); + free(d_in, Queues[0]); + + return 0; +} diff --git a/SYCL/USM/P2P/p2p_memcpy.cpp b/SYCL/USM/P2P/p2p_memcpy.cpp new file mode 100644 index 0000000000..09d62021ed --- /dev/null +++ b/SYCL/USM/P2P/p2p_memcpy.cpp @@ -0,0 +1,78 @@ +// REQUIRES: cuda +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %t.out + +#include +#include +#include +#include + +using namespace sycl; + +// Array size to copy +constexpr int N = 100; + +int main() { + + // Note that this code will largely be removed: it is temporary due to the + // temporary lack of multiple devices per sycl context in the Nvidia backend. + // A portable implementation, using a single gpu platform, should be possible + // once the Nvidia context issues are resolved. + //////////////////////////////////////////////////////////////////////// + std::vector Devs; + for (const auto &plt : sycl::platform::get_platforms()) { + + if (plt.get_backend() == sycl::backend::ext_oneapi_cuda) + Devs.push_back(plt.get_devices()[0]); + } + if (Devs.size() < 2) { + std::cout << "Cannot test P2P capabilities, at least two devices are " + "required, exiting." + << std::endl; + return 0; + } + + std::vector Queues; + std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), + [](const sycl::device &D) { return sycl::queue{D}; }); + //////////////////////////////////////////////////////////////////////// + + if (!Devs[0].ext_oneapi_can_access_peer( + Devs[1], sycl::ext::oneapi::peer_access::access_supported)) { + std::cout << "P2P access is not supported by devices, exiting." + << std::endl; + return 0; + } + + // Enables Devs[0] to access Devs[1] memory. + Devs[0].ext_oneapi_enable_peer_access(Devs[1]); + + std::vector input(N); + std::iota(input.begin(), input.end(), 0); + + int *arr0 = malloc(N, Queues[0], usm::alloc::device); + Queues[0].memcpy(arr0, &input[0], N * sizeof(int)); + + int *arr1 = malloc(N, Queues[1], usm::alloc::device); + // P2P copy performed here: + Queues[1].copy(arr0, arr1, N).wait(); + + int out[N]; + Queues[1].copy(arr1, out, N).wait(); + + sycl::free(arr0, Queues[0]); + sycl::free(arr1, Queues[1]); + + bool ok = true; + for (int i = 0; i < N; i++) { + if (out[i] != input[i]) { + printf("%d %d\n", out[i], input[i]); + ok = false; + break; + } + } + + printf("%s\n", ok ? "PASS" : "FAIL"); + + return 0; +} From b2fe38e7b999962833243c620e7632edd63361eb Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 6 Mar 2023 08:14:58 -0800 Subject: [PATCH 4/5] Chose better test name. Signed-off-by: JackAKirk --- SYCL/USM/P2P/p2p_atomics.cpp | 5 ----- SYCL/USM/P2P/{p2p_memcpy.cpp => p2p_copy.cpp} | 0 2 files changed, 5 deletions(-) rename SYCL/USM/P2P/{p2p_memcpy.cpp => p2p_copy.cpp} (100%) diff --git a/SYCL/USM/P2P/p2p_atomics.cpp b/SYCL/USM/P2P/p2p_atomics.cpp index 457221da45..ff38d45790 100644 --- a/SYCL/USM/P2P/p2p_atomics.cpp +++ b/SYCL/USM/P2P/p2p_atomics.cpp @@ -37,11 +37,6 @@ int main() { [](const sycl::device &D) { return sycl::queue{D}; }); //////////////////////////////////////////////////////////////////////// - auto Dev0 = Queues[0].get_device(); - auto Dev1 = Queues[1].get_device(); - - assert(Dev0 != Dev1); - if (!Devs[0].ext_oneapi_can_access_peer( Devs[1], sycl::ext::oneapi::peer_access::atomics_supported)) { std::cout << "P2P atomics are not supported by devices, exiting." diff --git a/SYCL/USM/P2P/p2p_memcpy.cpp b/SYCL/USM/P2P/p2p_copy.cpp similarity index 100% rename from SYCL/USM/P2P/p2p_memcpy.cpp rename to SYCL/USM/P2P/p2p_copy.cpp From dfcd93adc90520e8741f0c148a9d10477aac8f65 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 10 Mar 2023 08:42:36 -0800 Subject: [PATCH 5/5] Added PI_TRACE tests for new pi peer calls. Signed-off-by: JackAKirk --- SYCL/USM/P2P/p2p_access.cpp | 6 +++++- SYCL/USM/P2P/p2p_atomics.cpp | 15 +++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/SYCL/USM/P2P/p2p_access.cpp b/SYCL/USM/P2P/p2p_access.cpp index 6e04f390d7..22e351aec3 100644 --- a/SYCL/USM/P2P/p2p_access.cpp +++ b/SYCL/USM/P2P/p2p_access.cpp @@ -1,6 +1,6 @@ // REQUIRES: cuda // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %t.out +// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER #include #include @@ -69,3 +69,7 @@ int main() { return 0; } + +// CHECK: ---> piextPeerAccessGetInfo( +// CHECK: ---> piextEnablePeerAccess( +// CHECK: ---> piextDisablePeerAccess( diff --git a/SYCL/USM/P2P/p2p_atomics.cpp b/SYCL/USM/P2P/p2p_atomics.cpp index ff38d45790..74651946fb 100644 --- a/SYCL/USM/P2P/p2p_atomics.cpp +++ b/SYCL/USM/P2P/p2p_atomics.cpp @@ -1,6 +1,6 @@ // REQUIRES: cuda -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_61 %s -o %t.out +// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER #include #include @@ -37,15 +37,15 @@ int main() { [](const sycl::device &D) { return sycl::queue{D}; }); //////////////////////////////////////////////////////////////////////// - if (!Devs[0].ext_oneapi_can_access_peer( - Devs[1], sycl::ext::oneapi::peer_access::atomics_supported)) { + if (!Devs[1].ext_oneapi_can_access_peer( + Devs[0], sycl::ext::oneapi::peer_access::atomics_supported)) { std::cout << "P2P atomics are not supported by devices, exiting." << std::endl; return 0; } - // Enables Dev1 to access Dev0 memory. - Dev1.ext_oneapi_enable_peer_access(Dev0); + // Enables Devs[1] to access Devs[0] memory. + Devs[1].ext_oneapi_enable_peer_access(Devs[0]); std::vector input(N); std::iota(input.begin(), input.end(), 0); @@ -80,3 +80,6 @@ int main() { return 0; } + +// CHECK: ---> piextPeerAccessGetInfo( +// CHECK: ---> piextEnablePeerAccess(