Skip to content

Commit f958dce

Browse files
authored
[DOC][ABI-BREAK] Remove bfloat16 math aspect. (#13351)
This aspect is not required because bfloat16 math functions are implemented for all devices via generic implementations. This PR updates this status inline with the main bfloat16 extension/doc. --------- Signed-off-by: JackAKirk <[email protected]>
1 parent cb2efef commit f958dce

File tree

16 files changed

+23
-83
lines changed

16 files changed

+23
-83
lines changed

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def AspectExt_oneapi_native_assert : Aspect<"ext_oneapi_native_assert">;
4343
def AspectHost_debuggable : Aspect<"host_debuggable">;
4444
def AspectExt_intel_gpu_hw_threads_per_eu : Aspect<"ext_intel_gpu_hw_threads_per_eu">;
4545
def AspectExt_oneapi_cuda_async_barrier : Aspect<"ext_oneapi_cuda_async_barrier">;
46-
def AspectExt_oneapi_bfloat16_math_functions : Aspect<"ext_oneapi_bfloat16_math_functions">;
4746
def AspectExt_intel_free_memory : Aspect<"ext_intel_free_memory">;
4847
def AspectExt_intel_device_id : Aspect<"ext_intel_device_id">;
4948
def AspectExt_intel_memory_clock_rate : Aspect<"ext_intel_memory_clock_rate">;
@@ -125,7 +124,7 @@ def : TargetInfo<"__TestAspectList",
125124
AspectExt_intel_max_mem_bandwidth, AspectExt_intel_mem_channel, AspectUsm_atomic_host_allocations,
126125
AspectUsm_atomic_shared_allocations, AspectAtomic64, AspectExt_intel_device_info_uuid, AspectExt_oneapi_srgb,
127126
AspectExt_oneapi_native_assert, AspectHost_debuggable, AspectExt_intel_gpu_hw_threads_per_eu,
128-
AspectExt_oneapi_cuda_async_barrier, AspectExt_oneapi_bfloat16_math_functions, AspectExt_intel_free_memory,
127+
AspectExt_oneapi_cuda_async_barrier, AspectExt_intel_free_memory,
129128
AspectExt_intel_device_id, AspectExt_intel_memory_clock_rate, AspectExt_intel_memory_bus_width, AspectEmulated,
130129
AspectExt_intel_legacy_image, AspectExt_oneapi_bindless_images,
131130
AspectExt_oneapi_bindless_images_shared_usm, AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm,
@@ -198,17 +197,17 @@ def : CudaTargetInfo<"nvidia_gpu_sm_70", !listconcat(CudaMinAspects, CudaBindles
198197
def : CudaTargetInfo<"nvidia_gpu_sm_72", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
199198
def : CudaTargetInfo<"nvidia_gpu_sm_75", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>;
200199
def : CudaTargetInfo<"nvidia_gpu_sm_80", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
201-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
200+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>;
202201
def : CudaTargetInfo<"nvidia_gpu_sm_86", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
203-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
202+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>;
204203
def : CudaTargetInfo<"nvidia_gpu_sm_87", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
205-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
204+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>;
206205
def : CudaTargetInfo<"nvidia_gpu_sm_89", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
207-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
206+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>;
208207
def : CudaTargetInfo<"nvidia_gpu_sm_90", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
209-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
208+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>;
210209
def : CudaTargetInfo<"nvidia_gpu_sm_90a", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
211-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>;
210+
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>;
212211

213212
//
214213
// HIP / AMDGPU device aspects

sycl/doc/design/DeviceConfigFile.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,6 @@ def AspectExt_oneapi_native_assert : Aspect<"ext_oneapi_native_assert">;
176176
def AspectHost_debuggable : Aspect<"host_debuggable">;
177177
def AspectExt_intel_gpu_hw_threads_per_eu : Aspect<"ext_intel_gpu_hw_threads_per_eu">;
178178
def AspectExt_oneapi_cuda_async_barrier : Aspect<"ext_oneapi_cuda_async_barrier">;
179-
def AspectExt_oneapi_bfloat16_math_functions : Aspect<"ext_oneapi_bfloat16_math_functions">;
180179
def AspectExt_intel_free_memory : Aspect<"ext_intel_free_memory">;
181180
def AspectExt_intel_device_id : Aspect<"ext_intel_device_id">;
182181
def AspectExt_intel_memory_clock_rate : Aspect<"ext_intel_memory_clock_rate">;

sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16_math_functions.asciidoc

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ The descriptions of the `fma`, `fmin`, `fmax`, `fabs`, `isnan`, `ceil`, `floor`,
6767
specification:
6868
https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_math_functions.
6969

70+
[NOTE]
71+
The bfloat16 type is supported on all devices. DPC++ currently supports some
72+
bfloat16 type math functions natively on Intel Xe HP GPUs and Nvidia GPUs with
73+
Compute Capability >= SM80. On other devices, and in host code, such functions
74+
are emulated in software.
75+
7076
== Specification
7177

7278
=== Feature test macro
@@ -86,21 +92,6 @@ supports.
8692
|1 |The APIs of this experimental extension are not versioned, so the feature-test macro always has this value.
8793
|===
8894

89-
=== Extension to `enum class aspect`
90-
91-
[source]
92-
----
93-
namespace sycl {
94-
enum class aspect {
95-
...
96-
sycl_ext_oneapi_bfloat16_math_functions
97-
}
98-
}
99-
----
100-
101-
If a SYCL device has the `sycl_ext_oneapi_bfloat16_math_functions` aspect,
102-
then it supports the `bfloat16` math functions described in the next section.
103-
10495
=== Math Functions
10596

10697
==== isnan

sycl/include/sycl/device_aspect_macros.hpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -183,11 +183,6 @@
183183
#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_cuda_async_barrier__ 0
184184
#endif
185185

186-
#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bfloat16_math_functions__
187-
// __SYCL_ASPECT(ext_oneapi_bfloat16_math_functions, 35)
188-
#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bfloat16_math_functions__ 0
189-
#endif
190-
191186
#ifndef __SYCL_ALL_DEVICES_HAVE_ext_intel_free_memory__
192187
// __SYCL_ASPECT(ext_intel_free_memory, 36)
193188
#define __SYCL_ALL_DEVICES_HAVE_ext_intel_free_memory__ 0
@@ -561,11 +556,6 @@
561556
#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_cuda_async_barrier__ 0
562557
#endif
563558

564-
#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bfloat16_math_functions__
565-
// __SYCL_ASPECT(ext_oneapi_bfloat16_math_functions, 35)
566-
#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bfloat16_math_functions__ 0
567-
#endif
568-
569559
#ifndef __SYCL_ANY_DEVICE_HAS_ext_intel_free_memory__
570560
// __SYCL_ASPECT(ext_intel_free_memory, 36)
571561
#define __SYCL_ANY_DEVICE_HAS_ext_intel_free_memory__ 0

sycl/include/sycl/info/aspects.def

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ __SYCL_ASPECT(ext_oneapi_native_assert, 31)
2929
__SYCL_ASPECT(host_debuggable, 32)
3030
__SYCL_ASPECT(ext_intel_gpu_hw_threads_per_eu, 33)
3131
__SYCL_ASPECT(ext_oneapi_cuda_async_barrier, 34)
32-
__SYCL_ASPECT(ext_oneapi_bfloat16_math_functions, 35)
3332
__SYCL_ASPECT(ext_intel_free_memory, 36)
3433
__SYCL_ASPECT(ext_intel_device_id, 37)
3534
__SYCL_ASPECT(ext_intel_memory_clock_rate, 38)

sycl/include/sycl/info/device_traits.def

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,6 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_srgb, bool,
202202
PI_DEVICE_INFO_IMAGE_SRGB)
203203
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_mem_channel, bool,
204204
PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT)
205-
__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_bfloat16_math_functions, bool,
206-
PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS)
207205

208206
//Deprecated oneapi/intel extension
209207
//TODO:Remove when possible

sycl/source/detail/device_impl.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,6 @@ bool device_impl::has(aspect Aspect) const {
346346
return has_extension("cl_khr_fp16");
347347
case aspect::fp64:
348348
return has_extension("cl_khr_fp64");
349-
case aspect::ext_oneapi_bfloat16_math_functions:
350-
return get_info<info::device::ext_oneapi_bfloat16_math_functions>();
351349
case aspect::int64_base_atomics:
352350
return has_extension("cl_khr_int64_base_atomics");
353351
case aspect::int64_extended_atomics:

sycl/source/detail/device_info.hpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -301,25 +301,6 @@ struct get_device_info_impl<std::vector<memory_scope>,
301301
}
302302
};
303303

304-
// Specialization for bf16 math functions
305-
template <>
306-
struct get_device_info_impl<bool,
307-
info::device::ext_oneapi_bfloat16_math_functions> {
308-
static bool get(const DeviceImplPtr &Dev) {
309-
bool result = false;
310-
311-
sycl::detail::pi::PiResult Err =
312-
Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
313-
Dev->getHandleRef(),
314-
PiInfoCode<info::device::ext_oneapi_bfloat16_math_functions>::value,
315-
sizeof(result), &result, nullptr);
316-
if (Err != PI_SUCCESS) {
317-
return false;
318-
}
319-
return result;
320-
}
321-
};
322-
323304
// Specialization for exec_capabilities, OpenCL returns a bitfield
324305
template <>
325306
struct get_device_info_impl<std::vector<info::execution_capability>,

sycl/test-e2e/BFloat16/bfloat16_conversions.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
// UNSUPPORTED: accelerator
77

88
// FIXME: enable opaque pointers support on CPU.
9-
// UNSUPPORTED: cpu
109

1110
//==---------- bfloat16_conversions.cpp - SYCL bfloat16 type test ---------==//
1211
//

sycl/test-e2e/BFloat16/bfloat16_type.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
// UNSUPPORTED: accelerator
88

99
// FIXME: enable opaque pointers support on CPU.
10-
// UNSUPPORTED: cpu
1110

1211
//==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==//
1312
//

0 commit comments

Comments
 (0)