Skip to content

Commit 5987a9b

Browse files
Simonsays095karturov
authored andcommitted
gpu: ocl: remove fallback atomic function path
1 parent 90451cc commit 5987a9b

File tree

1 file changed

+8
-26
lines changed

1 file changed

+8
-26
lines changed

src/gpu/ocl/ocl_math_utils.h

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -355,42 +355,24 @@ DECLARE_MMAD_EMU(mmad8x8_bf16, bf16_dot2, 8, 8, short8, int8, float8)
355355
#endif
356356

357357
// Atomics
358-
#if __OPENCL_C_VERSION__ >= 200
359358
#define DECLARE_ATOMIC_OP(op, type) \
360359
type __attribute__((overloadable)) CONCAT3(atomic_, op, _global)( \
361360
volatile global CONCAT2(atomic_, type) * source, type operand) { \
362361
return CONCAT3(atomic_fetch_, op, _explicit)( \
363362
source, operand, memory_order_relaxed); \
364363
}
365-
366-
#if defined(cl_intel_global_float_atomics) \
367-
|| (defined(cl_ext_float_atomics) \
368-
&& defined(__opencl_c_ext_fp32_global_atomic_add))
364+
#if __OPENCL_C_VERSION__ >= 200
365+
// Atomic operations require:
366+
// 1. The cl_ext_float_atomics extension (for all float functions)
367+
// 2. the __opencl_c_ext_fp32_global_atomic_add feature (for float add/sub)
368+
// 3. the __opencl_c_ext_fp32_global_atomic_min_max feature (for float min/max)
369+
// All intel GPUs should support these on up-to-date drivers, for all archs
370+
// gen9 and later
369371
DECLARE_ATOMIC_OP(add, float)
370372
DECLARE_ATOMIC_OP(sub, float)
371-
#else // float atomics
372-
inline float atomic_add_global(
373-
volatile __global atomic_float *source, float operand) {
374-
float old_val = atomic_load_explicit(
375-
source, memory_order_relaxed, memory_scope_device);
376-
bool success = false;
377-
do {
378-
float new_val = old_val + operand;
379-
success = atomic_compare_exchange_strong_explicit(source, &old_val,
380-
new_val, memory_order_acq_rel, memory_order_relaxed,
381-
memory_scope_device);
382-
} while (!success);
383-
return old_val;
384-
}
385-
#endif
386373

387-
#if defined(cl_intel_global_float_atomics) \
388-
|| (defined(cl_ext_float_atomics) \
389-
&& defined(__opencl_c_ext_fp32_global_atomic_min_max))
390374
DECLARE_ATOMIC_OP(min, float)
391375
DECLARE_ATOMIC_OP(max, float)
392376
#endif
393377

394-
#endif
395-
396-
#endif
378+
#endif

0 commit comments

Comments
 (0)