Skip to content

Commit 43a4192

Browse files
authored
[SYCL][PI][CUDA] Update queries for atomic order and scope for CUDA (#4853)
Updates returns for atomics memory order and scope capabilities queries to make them in line with changes in #4820. This includes adding the previously not existing option to query for atomic scope capabilities.
1 parent ec29322 commit 43a4192

File tree

13 files changed

+136
-7
lines changed

13 files changed

+136
-7
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,12 +304,12 @@ typedef enum {
304304
PI_DEVICE_INFO_IMAGE_SRGB = 0x10027,
305305
PI_DEVICE_INFO_ATOMIC_64 = 0x10110,
306306
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
307+
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,
307308
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112,
308309
PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS = 0x20000,
309310
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D = 0x20001,
310311
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002,
311312
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003
312-
313313
} _pi_device_info;
314314

315315
typedef enum {
@@ -330,7 +330,8 @@ typedef enum {
330330
PI_CONTEXT_INFO_PROPERTIES = CL_CONTEXT_PROPERTIES,
331331
PI_CONTEXT_INFO_REFERENCE_COUNT = CL_CONTEXT_REFERENCE_COUNT,
332332
// Atomics capabilities extensions
333-
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010
333+
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010,
334+
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011
334335
} _pi_context_info;
335336

336337
typedef enum {
@@ -537,6 +538,13 @@ constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE = 0x04;
537538
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL = 0x08;
538539
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST = 0x10;
539540

541+
using pi_memory_scope_capabilities = pi_bitfield;
542+
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM = 0x01;
543+
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP = 0x02;
544+
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04;
545+
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08;
546+
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10;
547+
540548
typedef enum {
541549
PI_PROFILING_INFO_COMMAND_QUEUED = CL_PROFILING_COMMAND_QUEUED,
542550
PI_PROFILING_INFO_COMMAND_SUBMIT = CL_PROFILING_COMMAND_SUBMIT,

sycl/include/CL/sycl/info/context_traits.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ __SYCL_PARAM_TRAITS_SPEC(context, reference_count, cl_uint)
22
__SYCL_PARAM_TRAITS_SPEC(context, platform, cl::sycl::platform)
33
__SYCL_PARAM_TRAITS_SPEC(context, devices, std::vector<cl::sycl::device>)
44
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector<cl::sycl::memory_order>)
5+
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector<cl::sycl::memory_scope>)

sycl/include/CL/sycl/info/device_traits.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ __SYCL_PARAM_TRAITS_SPEC(device, image_support, bool)
2525
__SYCL_PARAM_TRAITS_SPEC(device, atomic64, bool)
2626
__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities,
2727
std::vector<cl::sycl::memory_order>)
28+
__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities,
29+
std::vector<cl::sycl::memory_scope>)
2830
__SYCL_PARAM_TRAITS_SPEC(device, max_read_image_args, pi_uint32)
2931
__SYCL_PARAM_TRAITS_SPEC(device, max_write_image_args, pi_uint32)
3032
__SYCL_PARAM_TRAITS_SPEC(device, image2d_max_width, size_t)

sycl/include/CL/sycl/info/info_desc.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class program;
2121
class device;
2222
class platform;
2323
class kernel_id;
24+
enum class memory_scope;
2425

2526
// TODO: stop using OpenCL directly, use PI.
2627
namespace info {
@@ -44,6 +45,8 @@ enum class context : cl_context_info {
4445
devices = CL_CONTEXT_DEVICES,
4546
atomic_memory_order_capabilities =
4647
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES,
48+
atomic_memory_scope_capabilities =
49+
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES,
4750
};
4851

4952
// A.3 Device information descriptors
@@ -168,7 +171,9 @@ enum class device : cl_device_info {
168171
PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS,
169172
ext_oneapi_max_work_groups_1d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D,
170173
ext_oneapi_max_work_groups_2d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D,
171-
ext_oneapi_max_work_groups_3d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
174+
ext_oneapi_max_work_groups_3d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D,
175+
atomic_memory_scope_capabilities =
176+
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
172177
};
173178

174179
enum class device_type : pi_uint64 {

sycl/include/CL/sycl/memory_enums.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,22 @@ readMemoryOrderBitfield(pi_memory_order_capabilities bits) {
6363
return result;
6464
}
6565

66+
inline std::vector<memory_scope>
67+
readMemoryScopeBitfield(pi_memory_scope_capabilities bits) {
68+
std::vector<memory_scope> result;
69+
if (bits & PI_MEMORY_SCOPE_WORK_ITEM)
70+
result.push_back(memory_scope::work_item);
71+
if (bits & PI_MEMORY_SCOPE_SUB_GROUP)
72+
result.push_back(memory_scope::sub_group);
73+
if (bits & PI_MEMORY_SCOPE_WORK_GROUP)
74+
result.push_back(memory_scope::work_group);
75+
if (bits & PI_MEMORY_SCOPE_DEVICE)
76+
result.push_back(memory_scope::device);
77+
if (bits & PI_MEMORY_SCOPE_SYSTEM)
78+
result.push_back(memory_scope::system);
79+
return result;
80+
}
81+
6682
#ifndef __SYCL_DEVICE_ONLY__
6783
static constexpr std::memory_order getStdMemoryOrder(sycl::memory_order order) {
6884
switch (order) {

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,33 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name,
851851
case PI_CONTEXT_INFO_REFERENCE_COUNT:
852852
return getInfo(param_value_size, param_value, param_value_size_ret,
853853
context->get_reference_count());
854+
case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
855+
int major = 0;
856+
cl::sycl::detail::pi::assertion(
857+
cuDeviceGetAttribute(&major,
858+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
859+
context->get_device()->get()) == CUDA_SUCCESS);
860+
pi_memory_order_capabilities capabilities =
861+
(major >= 6) ? PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
862+
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL
863+
: PI_MEMORY_ORDER_RELAXED;
864+
return getInfo(param_value_size, param_value, param_value_size_ret,
865+
capabilities);
866+
}
867+
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
868+
int major = 0;
869+
cl::sycl::detail::pi::assertion(
870+
cuDeviceGetAttribute(&major,
871+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
872+
context->get_device()->get()) == CUDA_SUCCESS);
873+
pi_memory_order_capabilities capabilities =
874+
(major >= 5) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
875+
PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE |
876+
PI_MEMORY_SCOPE_SYSTEM
877+
: PI_MEMORY_SCOPE_DEVICE;
878+
return getInfo(param_value_size, param_value, param_value_size_ret,
879+
capabilities);
880+
}
854881
default:
855882
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
856883
}
@@ -1112,11 +1139,31 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
11121139
atomic64);
11131140
}
11141141
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
1115-
// NVPTX currently only support at most monotonic atomic load/store.
1116-
// Acquire and release is present in newer PTX, but is not yet supported
1117-
// in LLVM NVPTX.
1142+
int major = 0;
1143+
cl::sycl::detail::pi::assertion(
1144+
cuDeviceGetAttribute(&major,
1145+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
1146+
device->get()) == CUDA_SUCCESS);
1147+
pi_memory_order_capabilities capabilities =
1148+
(major >= 6) ? PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
1149+
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL
1150+
: PI_MEMORY_ORDER_RELAXED;
1151+
return getInfo(param_value_size, param_value, param_value_size_ret,
1152+
capabilities);
1153+
}
1154+
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
1155+
int major = 0;
1156+
cl::sycl::detail::pi::assertion(
1157+
cuDeviceGetAttribute(&major,
1158+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
1159+
device->get()) == CUDA_SUCCESS);
1160+
pi_memory_order_capabilities capabilities =
1161+
(major >= 5) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
1162+
PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE |
1163+
PI_MEMORY_SCOPE_SYSTEM
1164+
: PI_MEMORY_SCOPE_DEVICE;
11181165
return getInfo(param_value_size, param_value, param_value_size_ret,
1119-
PI_MEMORY_ORDER_RELAXED);
1166+
capabilities);
11201167
}
11211168
case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
11221169
// NVIDIA devices only support one sub-group size (the warp size)

sycl/plugins/hip/pi_hip.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,7 @@ pi_result hip_piContextGetInfo(pi_context context, pi_context_info param_name,
845845
case PI_CONTEXT_INFO_REFERENCE_COUNT:
846846
return getInfo(param_value_size, param_value, param_value_size_ret,
847847
context->get_reference_count());
848+
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
848849
default:
849850
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
850851
}
@@ -1625,6 +1626,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
16251626
case PI_DEVICE_INFO_ATOMIC_64:
16261627
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
16271628
// TODO: Investigate if this information is available on HIP.
1629+
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
16281630
case PI_DEVICE_INFO_PCI_ADDRESS:
16291631
case PI_DEVICE_INFO_GPU_EU_COUNT:
16301632
case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH:

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,6 +2648,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
26482648
// currently not supported in level zero runtime
26492649
return PI_INVALID_VALUE;
26502650

2651+
// TODO: Implement.
2652+
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
26512653
default:
26522654
zePrint("Unsupported ParamName in piGetDeviceInfo\n");
26532655
zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName);
@@ -2842,6 +2844,7 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName,
28422844
return ReturnValue(pi_uint32(Context->Devices.size()));
28432845
case PI_CONTEXT_INFO_REFERENCE_COUNT:
28442846
return ReturnValue(pi_uint32{Context->RefCount});
2847+
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
28452848
default:
28462849
// TODO: implement other parameters
28472850
die("piGetContextInfo: unsuppported ParamName.");

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName,
275275
// TODO: Implement.
276276
case PI_DEVICE_INFO_ATOMIC_64:
277277
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
278+
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
278279
return PI_INVALID_VALUE;
279280
case PI_DEVICE_INFO_IMAGE_SRGB: {
280281
cl_bool result = true;

sycl/source/detail/context_impl.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,24 @@ context_impl::get_info<info::context::atomic_memory_order_capabilities>()
163163
sizeof(Result), &Result, nullptr);
164164
return readMemoryOrderBitfield(Result);
165165
}
166+
template <>
167+
std::vector<cl::sycl::memory_scope>
168+
context_impl::get_info<info::context::atomic_memory_scope_capabilities>()
169+
const {
170+
if (is_host())
171+
return {cl::sycl::memory_scope::work_item,
172+
cl::sycl::memory_scope::sub_group,
173+
cl::sycl::memory_scope::work_group, cl::sycl::memory_scope::device,
174+
cl::sycl::memory_scope::system};
175+
176+
pi_memory_scope_capabilities Result;
177+
getPlugin().call<PiApiKind::piContextGetInfo>(
178+
MContext,
179+
pi::cast<pi_context_info>(
180+
info::context::atomic_memory_scope_capabilities),
181+
sizeof(Result), &Result, nullptr);
182+
return readMemoryScopeBitfield(Result);
183+
}
166184

167185
RT::PiContext &context_impl::getHandleRef() { return MContext; }
168186
const RT::PiContext &context_impl::getHandleRef() const { return MContext; }

sycl/source/detail/device_info.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,21 @@ struct get_device_info<std::vector<memory_order>,
266266
}
267267
};
268268

269+
// Specialization for atomic_memory_scope_capabilities, PI returns a bitfield
270+
template <>
271+
struct get_device_info<std::vector<memory_scope>,
272+
info::device::atomic_memory_scope_capabilities> {
273+
static std::vector<memory_scope> get(RT::PiDevice dev, const plugin &Plugin) {
274+
pi_memory_scope_capabilities result;
275+
Plugin.call_nocheck<PiApiKind::piDeviceGetInfo>(
276+
dev,
277+
pi::cast<RT::PiDeviceInfo>(
278+
info::device::atomic_memory_scope_capabilities),
279+
sizeof(pi_memory_scope_capabilities), &result, nullptr);
280+
return readMemoryScopeBitfield(result);
281+
}
282+
};
283+
269284
// Specialization for exec_capabilities, OpenCL returns a bitfield
270285
template <>
271286
struct get_device_info<std::vector<info::execution_capability>,
@@ -764,6 +779,13 @@ get_device_info_host<info::device::atomic_memory_order_capabilities>() {
764779
memory_order::acq_rel, memory_order::seq_cst};
765780
}
766781

782+
template <>
783+
inline std::vector<memory_scope>
784+
get_device_info_host<info::device::atomic_memory_scope_capabilities>() {
785+
return {memory_scope::work_item, memory_scope::sub_group,
786+
memory_scope::work_group, memory_scope::device, memory_scope::system};
787+
}
788+
767789
template <>
768790
inline cl_uint get_device_info_host<info::device::max_read_image_args>() {
769791
// current value is the required minimum

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4249,6 +4249,7 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65575EEENS3_12param_traitsIS4_XT_
42494249
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65808EEENS3_12param_traitsIS4_XT_EE11return_typeEv
42504250
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65809EEENS3_12param_traitsIS4_XT_EE11return_typeEv
42514251
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65810EEENS3_12param_traitsIS4_XT_EE11return_typeEv
4252+
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE69632EEENS3_12param_traitsIS4_XT_EE11return_typeEv
42524253
_ZNK2cl4sycl6device9getNativeEv
42534254
_ZNK2cl4sycl6kernel11get_backendEv
42544255
_ZNK2cl4sycl6kernel11get_contextEv
@@ -4348,6 +4349,7 @@ _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4224EEENS3_12param_traitsIS4_XT
43484349
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4225EEENS3_12param_traitsIS4_XT_EE11return_typeEv
43494350
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4228EEENS3_12param_traitsIS4_XT_EE11return_typeEv
43504351
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65552EEENS3_12param_traitsIS4_XT_EE11return_typeEv
4352+
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65553EEENS3_12param_traitsIS4_XT_EE11return_typeEv
43514353
_ZNK2cl4sycl7context9getNativeEv
43524354
_ZNK2cl4sycl7handler14getHandlerImplEv
43534355
_ZNK2cl4sycl7handler27isStateExplicitKernelBundleEv

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
??$get_info@$0BAAA@@device@sycl@cl@@QEBA?AW4device_type@info@12@XZ
2424
??$get_info@$0BAAB@@device@sycl@cl@@QEBAIXZ
2525
??$get_info@$0BAABA@@context@sycl@cl@@QEBA?AV?$vector@W4memory_order@sycl@cl@@V?$allocator@W4memory_order@sycl@cl@@@std@@@std@@XZ
26+
??$get_info@$0BAABB@@context@sycl@cl@@QEBA?AV?$vector@W4memory_scope@sycl@cl@@V?$allocator@W4memory_scope@sycl@cl@@@std@@@std@@XZ
2627
??$get_info@$0BAAC@@device@sycl@cl@@QEBAIXZ
2728
??$get_info@$0BAACA@@device@sycl@cl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ
2829
??$get_info@$0BAACB@@device@sycl@cl@@QEBAIXZ
@@ -115,6 +116,7 @@
115116
??$get_info@$0BAJA@@queue@sycl@cl@@QEBA?AVcontext@12@XZ
116117
??$get_info@$0BAJB@@queue@sycl@cl@@QEBA?AVdevice@12@XZ
117118
??$get_info@$0BAJC@@queue@sycl@cl@@QEBAIXZ
119+
??$get_info@$0BBAAA@@device@sycl@cl@@QEBA?AV?$vector@W4memory_scope@sycl@cl@@V?$allocator@W4memory_scope@sycl@cl@@@std@@@std@@XZ
118120
??$get_info@$0BBGA@@program@sycl@cl@@QEBAIXZ
119121
??$get_info@$0BBGB@@program@sycl@cl@@QEBA?AVcontext@12@XZ
120122
??$get_info@$0BBGD@@program@sycl@cl@@QEBA?AV?$vector@Vdevice@sycl@cl@@V?$allocator@Vdevice@sycl@cl@@@std@@@std@@XZ

0 commit comments

Comments
 (0)