Skip to content

Commit 4f13cd1

Browse files
authored
Merge pull request #1282 from al42and/fix-sub-group-size-info
Fix size confusion for several device property queries
2 parents 4f10526 + 2c3096a commit 4f13cd1

File tree

4 files changed

+22
-12
lines changed

4 files changed

+22
-12
lines changed

source/adapters/cuda/device.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
215215
int Major = 0;
216216
UR_CHECK_ERROR(cuDeviceGetAttribute(
217217
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
218-
uint64_t Capabilities =
218+
ur_memory_scope_capability_flags_t Capabilities =
219219
(Major >= 7) ? UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
220220
UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
221221
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP |
@@ -270,7 +270,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
270270
int WarpSize = 0;
271271
UR_CHECK_ERROR(cuDeviceGetAttribute(
272272
&WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, hDevice->get()));
273-
size_t Sizes[1] = {static_cast<size_t>(WarpSize)};
273+
uint32_t Sizes[1] = {static_cast<uint32_t>(WarpSize)};
274274
return ReturnValue(Sizes, 1);
275275
}
276276
case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: {

source/adapters/hip/device.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
190190
int WarpSize = 0;
191191
UR_CHECK_ERROR(hipDeviceGetAttribute(&WarpSize, hipDeviceAttributeWarpSize,
192192
hDevice->get()));
193-
size_t Sizes[1] = {static_cast<size_t>(WarpSize)};
193+
uint32_t Sizes[1] = {static_cast<uint32_t>(WarpSize)};
194194
return ReturnValue(Sizes, 1);
195195
}
196196
case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: {
@@ -792,9 +792,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
792792
// Because scopes are hierarchical, wider scopes support all narrower
793793
// scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
794794
// WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
795-
uint64_t Capabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
796-
UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
797-
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP;
795+
ur_memory_scope_capability_flags_t Capabilities =
796+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
797+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
798+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP;
798799
return ReturnValue(Capabilities);
799800
}
800801
case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: {

source/adapters/level_zero/device.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -626,11 +626,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
626626
return ReturnValue(static_cast<ur_bool_t>(false));
627627
}
628628
case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
629-
// ze_device_compute_properties.subGroupSizes is in uint32_t whereas the
630-
// expected return is size_t datatype. size_t can be 8 bytes of data.
631-
return ReturnValue.template operator()<size_t>(
632-
Device->ZeDeviceComputeProperties->subGroupSizes,
633-
Device->ZeDeviceComputeProperties->numSubGroupSizes);
629+
return ReturnValue(Device->ZeDeviceComputeProperties->subGroupSizes,
630+
Device->ZeDeviceComputeProperties->numSubGroupSizes);
634631
}
635632
case UR_DEVICE_INFO_IL_VERSION: {
636633
// Set to a space separated list of IL version strings of the form

source/adapters/opencl/device.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -910,7 +910,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
910910
case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION:
911911
case UR_DEVICE_INFO_BUILT_IN_KERNELS:
912912
case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES:
913-
case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL:
914913
case UR_DEVICE_INFO_IP_VERSION: {
915914
/* We can just use the OpenCL outputs because the sizes of OpenCL types
916915
* are the same as UR.
@@ -929,6 +928,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
929928

930929
return UR_RESULT_SUCCESS;
931930
}
931+
case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
932+
// Have to convert size_t to uint32_t
933+
size_t SubGroupSizesSize = 0;
934+
CL_RETURN_ON_FAILURE(
935+
clGetDeviceInfo(cl_adapter::cast<cl_device_id>(hDevice), CLPropName, 0,
936+
nullptr, &SubGroupSizesSize));
937+
std::vector<size_t> SubGroupSizes(SubGroupSizesSize / sizeof(size_t));
938+
CL_RETURN_ON_FAILURE(
939+
clGetDeviceInfo(cl_adapter::cast<cl_device_id>(hDevice), CLPropName,
940+
SubGroupSizesSize, SubGroupSizes.data(), nullptr));
941+
return ReturnValue.template operator()<uint32_t>(SubGroupSizes.data(),
942+
SubGroupSizes.size());
943+
}
932944
case UR_DEVICE_INFO_EXTENSIONS: {
933945
cl_device_id Dev = cl_adapter::cast<cl_device_id>(hDevice);
934946
size_t ExtSize = 0;

0 commit comments

Comments
 (0)