From 8e52381e7d14ca8fad73843299cb6b7355064b73 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Thu, 2 Mar 2023 08:49:58 -0800 Subject: [PATCH 1/4] Test OpenCL backend for atomic_memory_order_capabilities Both context and device should return at least memory_order::relaxed now on implementation-side, so this test should pass if the target device has an OpenCL 3.0 backend or newer. --- SYCL/AtomicRef/atomic_memory_order.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/SYCL/AtomicRef/atomic_memory_order.cpp b/SYCL/AtomicRef/atomic_memory_order.cpp index e06e7c00f7..cd631e8904 100644 --- a/SYCL/AtomicRef/atomic_memory_order.cpp +++ b/SYCL/AtomicRef/atomic_memory_order.cpp @@ -2,9 +2,9 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// L0, OpenCL backends don't currently support +// L0, OpenCL (<3.0) backends don't currently support // info::device::atomic_memory_order_capabilities -// UNSUPPORTED: level_zero || opencl +// UNSUPPORTED: level_zero // NOTE: General tests for atomic memory order capabilities. @@ -16,12 +16,22 @@ using namespace sycl; int main() { queue q; - std::vector supported_memory_orders = + // Context + std::vector supported_context_memory_orders = + q.get_context() + .get_info(); + + // Relaxed memory order must be supported. This ordering is used in other + // tests. + assert(is_supported(supported_context_memory_orders, memory_order::relaxed)); + + // Device + std::vector supported_device_memory_orders = q.get_device().get_info(); // Relaxed memory order must be supported. This ordering is used in other // tests. - assert(is_supported(supported_memory_orders, memory_order::relaxed)); + assert(is_supported(supported_device_memory_orders, memory_order::relaxed)); std::cout << "Test passed." << std::endl; } From 2eced82592200d5b4ca302e59ac204572810868b Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 17 Mar 2023 12:57:30 -0700 Subject: [PATCH 2/4] Add level_zero and opencl backends to atomic_memory_order* tests Need to adjust figures in acq_rel test to reduce work-group size so test can be used with all backends and devices. --- SYCL/AtomicRef/atomic_memory_order.cpp | 11 +++-------- SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp | 5 +---- SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp | 5 +---- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/SYCL/AtomicRef/atomic_memory_order.cpp b/SYCL/AtomicRef/atomic_memory_order.cpp index cd631e8904..532110b524 100644 --- a/SYCL/AtomicRef/atomic_memory_order.cpp +++ b/SYCL/AtomicRef/atomic_memory_order.cpp @@ -2,11 +2,10 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// L0, OpenCL (<3.0) backends don't currently support -// info::device::atomic_memory_order_capabilities -// UNSUPPORTED: level_zero -// NOTE: General tests for atomic memory order capabilities. +// This test checks whether the minimum required memory order capabilities are +// supported in both context and device queries. Specifically the "relaxed" +// memory order capability, which is used in other tests. #include "atomic_memory_order.h" #include @@ -21,16 +20,12 @@ int main() { q.get_context() .get_info(); - // Relaxed memory order must be supported. This ordering is used in other - // tests. assert(is_supported(supported_context_memory_orders, memory_order::relaxed)); // Device std::vector supported_device_memory_orders = q.get_device().get_info(); - // Relaxed memory order must be supported. This ordering is used in other - // tests. assert(is_supported(supported_device_memory_orders, memory_order::relaxed)); std::cout << "Test passed." << std::endl; diff --git a/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp b/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp index 5e62299583..306d2f1c56 100644 --- a/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp +++ b/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp @@ -1,10 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// L0, OpenCL, and HIP backends don't currently support -// info::device::atomic_memory_order_capabilities -// UNSUPPORTED: level_zero, opencl // NOTE: Tests fetch_add for acquire and release memory ordering. diff --git a/SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp b/SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp index 72e521aabe..8dcc35796a 100755 --- a/SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp +++ b/SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp @@ -1,10 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// L0, OpenCL, and HIP backends don't currently support -// info::device::atomic_memory_order_capabilities -// UNSUPPORTED: level_zero, opencl #include "atomic_memory_order.h" #include From 3457ecf0b0b9f2bf442c03b66739e007ed318502 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Tue, 21 Mar 2023 04:20:48 -0700 Subject: [PATCH 3/4] Re-added cuda-specific test details --- SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp | 2 +- SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp b/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp index 306d2f1c56..719a6998f5 100644 --- a/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp +++ b/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out diff --git a/SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp b/SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp index 8dcc35796a..1c207c5776 100755 --- a/SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp +++ b/SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out From 3dee62a3dc3ee98c7c212688b515036e28b5e10e Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 24 Mar 2023 10:17:48 -0700 Subject: [PATCH 4/4] Reduced work group size for acq_rel test Passes on cpu, fails on GPU, so I think a problem exists with the GPU driver implementation, maybe specifically with fetch_add. --- SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp b/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp index 719a6998f5..9e5180584d 100644 --- a/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp +++ b/SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp @@ -11,7 +11,7 @@ using namespace sycl; template void test_acquire_global() { - const size_t N_items = 1024; + const size_t N_items = 256; const size_t N_iters = 1000; int error = 0; @@ -53,7 +53,7 @@ template void test_acquire_global() { } template void test_acquire_local() { - const size_t local_size = 1024; + const size_t local_size = 256; const size_t N_wgs = 16; const size_t global_size = local_size * N_wgs; const size_t N_iters = 1000; @@ -102,7 +102,7 @@ template void test_acquire_local() { } template void test_release_global() { - const size_t N_items = 1024; + const size_t N_items = 256; const size_t N_iters = 1000; int error = 0; @@ -144,7 +144,7 @@ template void test_release_global() { } template void test_release_local() { - const size_t local_size = 1024; + const size_t local_size = 256; const size_t N_wgs = 16; const size_t global_size = local_size * N_wgs; const size_t N_iters = 1000;