Skip to content

Commit f84fc32

Browse files
authored
[SYCL] Add aspect for bfloat16 (#5720)
This PR adds a new aspect ext_oneapi_bfloat16 to allow a runtime check for if the device supports the bfloat16 floating point type. Only the CUDA implementation for checking if the device supports this aspect is added. Updated test: intel/llvm-test-suite#888
1 parent a310952 commit f84fc32

File tree

11 files changed

+46
-1
lines changed

11 files changed

+46
-1
lines changed

sycl/include/CL/sycl/aspects.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ enum class aspect {
5050
host_debuggable = 32,
5151
ext_intel_gpu_hw_threads_per_eu = 33,
5252
ext_oneapi_cuda_async_barrier = 34,
53+
ext_oneapi_bfloat16 = 35,
5354
};
5455

5556
} // namespace sycl

sycl/include/CL/sycl/detail/pi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ typedef enum {
298298
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,
299299
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112,
300300
PI_DEVICE_INFO_BACKEND_VERSION = 0x10113,
301+
// Return true if bfloat16 data type is supported by device
302+
PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16 = 0x1FFFF,
301303
PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS = 0x20000,
302304
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D = 0x20001,
303305
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002,

sycl/include/CL/sycl/info/device_traits.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities,
2727
std::vector<cl::sycl::memory_order>)
2828
__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities,
2929
std::vector<cl::sycl::memory_scope>)
30+
__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_bfloat16, bool)
3031
__SYCL_PARAM_TRAITS_SPEC(device, max_read_image_args, pi_uint32)
3132
__SYCL_PARAM_TRAITS_SPEC(device, max_write_image_args, pi_uint32)
3233
__SYCL_PARAM_TRAITS_SPEC(device, image2d_max_width, size_t)

sycl/include/CL/sycl/info/info_desc.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ enum class device : cl_device_info {
174174
ext_oneapi_max_work_groups_2d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D,
175175
ext_oneapi_max_work_groups_3d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D,
176176
atomic_memory_scope_capabilities =
177-
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
177+
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES,
178+
ext_oneapi_bfloat16 = PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16,
178179
};
179180

180181
enum class device_type : pi_uint64 {

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
12351235
return getInfo(param_value_size, param_value, param_value_size_ret,
12361236
capabilities);
12371237
}
1238+
case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: {
1239+
int major = 0;
1240+
cl::sycl::detail::pi::assertion(
1241+
cuDeviceGetAttribute(&major,
1242+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
1243+
device->get()) == CUDA_SUCCESS);
1244+
1245+
bool bfloat16 = (major >= 8) ? true : false;
1246+
return getInfo(param_value_size, param_value, param_value_size_ret,
1247+
bfloat16);
1248+
}
12381249
case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
12391250
// NVIDIA devices only support one sub-group size (the warp size)
12401251
int warpSize = 0;

sycl/plugins/hip/pi_hip.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1677,6 +1677,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
16771677
case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
16781678
case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
16791679
case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH:
1680+
case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16:
16801681
return PI_ERROR_INVALID_VALUE;
16811682

16821683
default:

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2925,6 +2925,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
29252925
case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH:
29262926
// currently not supported in level zero runtime
29272927
return PI_ERROR_INVALID_VALUE;
2928+
case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16:
2929+
return PI_ERROR_INVALID_VALUE;
29282930

29292931
// TODO: Implement.
29302932
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,8 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName,
230230
std::memcpy(paramValue, &result, sizeof(cl_bool));
231231
return PI_SUCCESS;
232232
}
233+
case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16:
234+
return PI_ERROR_INVALID_VALUE;
233235
case PI_DEVICE_INFO_IMAGE_SRGB: {
234236
cl_bool result = true;
235237
std::memcpy(paramValue, &result, sizeof(cl_bool));

sycl/source/detail/device_impl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,8 @@ bool device_impl::has(aspect Aspect) const {
276276
return has_extension("cl_khr_fp16");
277277
case aspect::fp64:
278278
return has_extension("cl_khr_fp64");
279+
case aspect::ext_oneapi_bfloat16:
280+
return get_info<info::device::ext_oneapi_bfloat16>();
279281
case aspect::int64_base_atomics:
280282
return has_extension("cl_khr_int64_base_atomics");
281283
case aspect::int64_extended_atomics:

sycl/source/detail/device_info.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,22 @@ struct get_device_info<std::vector<memory_scope>,
264264
}
265265
};
266266

267+
// Specialization for bf16
268+
template <> struct get_device_info<bool, info::device::ext_oneapi_bfloat16> {
269+
static bool get(RT::PiDevice dev, const plugin &Plugin) {
270+
271+
bool result = false;
272+
273+
RT::PiResult Err = Plugin.call_nocheck<PiApiKind::piDeviceGetInfo>(
274+
dev, pi::cast<RT::PiDeviceInfo>(info::device::ext_oneapi_bfloat16),
275+
sizeof(result), &result, nullptr);
276+
if (Err != PI_SUCCESS) {
277+
return false;
278+
}
279+
return result;
280+
}
281+
};
282+
267283
// Specialization for exec_capabilities, OpenCL returns a bitfield
268284
template <>
269285
struct get_device_info<std::vector<info::execution_capability>,
@@ -769,6 +785,11 @@ get_device_info_host<info::device::atomic_memory_scope_capabilities>() {
769785
memory_scope::work_group, memory_scope::device, memory_scope::system};
770786
}
771787

788+
template <>
789+
inline bool get_device_info_host<info::device::ext_oneapi_bfloat16>() {
790+
return false;
791+
}
792+
772793
template <>
773794
inline cl_uint get_device_info_host<info::device::max_read_image_args>() {
774795
// current value is the required minimum

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4300,6 +4300,7 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65809EEENS3_12param_traitsIS4_XT_
43004300
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65810EEENS3_12param_traitsIS4_XT_EE11return_typeEv
43014301
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65811EEENS3_12param_traitsIS4_XT_EE11return_typeEv
43024302
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE69632EEENS3_12param_traitsIS4_XT_EE11return_typeEv
4303+
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131071EEENS3_12param_traitsIS4_XT_EE11return_typeEv
43034304
_ZNK2cl4sycl6device9getNativeEv
43044305
_ZNK2cl4sycl6kernel11get_backendEv
43054306
_ZNK2cl4sycl6kernel11get_contextEv

0 commit comments

Comments
 (0)