@@ -194,6 +194,7 @@ class run_atomic_fence {
194
194
" and test_type = " + test_type_name)
195
195
.create ()) {
196
196
auto queue = once_per_unit::get_queue ();
197
+ // Early return for unsupported memory order abd memory scope.
197
198
if (!check_memory_order_scope_capabilities (queue, MemoryOrder,
198
199
MemoryScope, memory_order_name,
199
200
memory_scope_name)) {
@@ -226,11 +227,32 @@ class run_atomic_fence {
226
227
sycl::buffer<bool > res_buf (&res, sycl::range<1 >(1 ));
227
228
sycl::buffer<int > sync_buffer (&sync, sycl::range<1 >(1 ));
228
229
sycl::buffer<int > data_buffer (&data, sycl::range<1 >(1 ));
230
+ // Using the const host variable MemoryScope in the kernel directly
231
+ // may cause compile fail for AOT build. We transfer MemoryScope to
232
+ // device code and in final runtime the device code would not run if
233
+ // host code early return for unsupported memory capabilities.
234
+ sycl::buffer<sycl::memory_scope> memory_scope_buffer (&MemoryScope,
235
+ sycl::range<1 >(1 ));
236
+ // Transfer order_write to device code.
237
+ sycl::buffer<sycl::memory_order> memory_order_write_buffer (
238
+ &order_write, sycl::range<1 >(1 ));
239
+ // Transfer order_read to device code.
240
+ sycl::buffer<sycl::memory_order> memory_order_read_buffer (
241
+ &order_read, sycl::range<1 >(1 ));
229
242
queue.submit ([&](sycl::handler& cgh) {
230
243
auto res_acc =
231
244
res_buf.template get_access <sycl::access_mode::write>(cgh);
232
245
auto sync_flag_acc = get_accessor (cgh, sync_buffer);
233
246
auto data_acc = get_accessor (cgh, data_buffer);
247
+ auto memory_scope_acc =
248
+ memory_scope_buffer.template get_access <sycl::access_mode::read>(
249
+ cgh);
250
+ auto memory_order_write_acc =
251
+ memory_order_write_buffer
252
+ .template get_access <sycl::access_mode::read>(cgh);
253
+ auto memory_order_read_acc =
254
+ memory_order_read_buffer
255
+ .template get_access <sycl::access_mode::read>(cgh);
234
256
cgh.parallel_for (sycl::nd_range<1 >(global_range, local_range),
235
257
[=](sycl::nd_item<1 > nditem) {
236
258
auto g = nditem.get_group ();
@@ -246,7 +268,8 @@ class run_atomic_fence {
246
268
*data = value;
247
269
// Used atomic_fence to guarantee the order
248
270
// instructions execution
249
- sycl::atomic_fence (order_write, MemoryScope);
271
+ sycl::atomic_fence (memory_order_write_acc[0 ],
272
+ memory_scope_acc[0 ]);
250
273
// Used atomic sync flag to avoid data raicing
251
274
sync_flag = 1 ;
252
275
} else {
@@ -257,7 +280,8 @@ class run_atomic_fence {
257
280
break ;
258
281
}
259
282
}
260
- sycl::atomic_fence (order_read, MemoryScope);
283
+ sycl::atomic_fence (memory_order_read_acc[0 ],
284
+ memory_scope_acc[0 ]);
261
285
// After the fence safe non-atomic reading
262
286
if (write_happened) {
263
287
// Non-atomic read of data
0 commit comments