Skip to content

Commit dddaf7f

Browse files
[SYCL][L0] Add memory access hint to piKernelSetArgMemObj (#9752)
Signed-off-by: Tikhomirova, Kseniya <[email protected]>
1 parent fc5db6b commit dddaf7f

File tree

13 files changed

+262
-37
lines changed

13 files changed

+262
-37
lines changed

sycl/include/sycl/detail/pi.h

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,13 @@
9292
// 12.30 Added PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT device info query.
9393
// 12.31 Added PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP device
9494
// info query.
95-
// 12.32 Removed backwards compatibility of piextQueueCreateWithNativeHandle and
95+
// 13.32 Removed backwards compatibility of piextQueueCreateWithNativeHandle and
9696
// piextQueueGetNativeHandle
97+
// 14.33 Added new parameter (memory object properties) to
98+
// piextKernelSetArgMemObj
9799

98-
#define _PI_H_VERSION_MAJOR 13
99-
#define _PI_H_VERSION_MINOR 32
100+
#define _PI_H_VERSION_MAJOR 14
101+
#define _PI_H_VERSION_MINOR 33
100102

101103
#define _PI_STRING_HELPER(a) #a
102104
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -1709,13 +1711,38 @@ __SYCL_EXPORT pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
17091711
const pi_event *event_wait_list,
17101712
pi_event *event);
17111713

1714+
#ifndef PI_BIT
1715+
#define PI_BIT(_i) (1 << _i)
1716+
#endif // PI_BIT
1717+
1718+
typedef enum {
1719+
PI_ACCESS_READ_WRITE = PI_BIT(0),
1720+
PI_ACCESS_WRITE_ONLY = PI_BIT(1),
1721+
PI_ACCESS_READ_ONLY = PI_BIT(2)
1722+
} _pi_mem_obj_access;
1723+
using pi_mem_obj_access = _pi_mem_obj_access;
1724+
typedef uint32_t pi_mem_access_flag;
1725+
1726+
typedef enum {
1727+
PI_KERNEL_ARG_MEM_OBJ_ACCESS = 27,
1728+
PI_ENUM_FORCE_UINT32 = 0x7fffffff
1729+
} _pi_mem_obj_property_type;
1730+
using pi_mem_obj_property_type = _pi_mem_obj_property_type;
1731+
1732+
typedef struct {
1733+
pi_mem_obj_property_type type;
1734+
void *pNext;
1735+
pi_mem_access_flag mem_access;
1736+
} _pi_mem_obj_property;
1737+
using pi_mem_obj_property = _pi_mem_obj_property;
1738+
17121739
// Extension to allow backends to process a PI memory object before adding it
17131740
// as an argument for a kernel.
17141741
// Note: This is needed by the CUDA backend to extract the device pointer to
17151742
// the memory as the kernels uses it rather than the PI object itself.
1716-
__SYCL_EXPORT pi_result piextKernelSetArgMemObj(pi_kernel kernel,
1717-
pi_uint32 arg_index,
1718-
const pi_mem *arg_value);
1743+
__SYCL_EXPORT pi_result piextKernelSetArgMemObj(
1744+
pi_kernel kernel, pi_uint32 arg_index,
1745+
const pi_mem_obj_property *arg_properties, const pi_mem *arg_value);
17191746

17201747
// Extension to allow backends to process a PI sampler object before adding it
17211748
// as an argument for a kernel.

sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1409,7 +1409,8 @@ pi_result piKernelSetArg(pi_kernel, pi_uint32, size_t, const void *) {
14091409
DIE_NO_IMPLEMENTATION;
14101410
}
14111411

1412-
pi_result piextKernelSetArgMemObj(pi_kernel, pi_uint32, const pi_mem *) {
1412+
pi_result piextKernelSetArgMemObj(pi_kernel, pi_uint32,
1413+
const pi_mem_obj_property *, const pi_mem *) {
14131414
DIE_NO_IMPLEMENTATION;
14141415
}
14151416

sycl/plugins/hip/pi_hip.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,9 +2007,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
20072007
sycl::detail::pi::assertion(
20082008
hipDeviceGetPCIBusId(AddressBuffer, AddressBufferSize, device->get()) ==
20092009
hipSuccess);
2010-
// A typical PCI address is 12 bytes + \0: "1234:67:90.2", but the HIP API is not
2011-
// guaranteed to use this format. In practice, it uses this format, at least
2012-
// in 5.3-5.5. To be on the safe side, we make sure the terminating \0 is set.
2010+
// A typical PCI address is 12 bytes + \0: "1234:67:90.2", but the HIP API
2011+
// is not guaranteed to use this format. In practice, it uses this format,
2012+
// at least in 5.3-5.5. To be on the safe side, we make sure the terminating
2013+
// \0 is set.
20132014
AddressBuffer[AddressBufferSize - 1] = '\0';
20142015
sycl::detail::pi::assertion(strnlen(AddressBuffer, AddressBufferSize) > 0);
20152016
return getInfoArray(strnlen(AddressBuffer, AddressBufferSize - 1) + 1,
@@ -2961,7 +2962,9 @@ pi_result hip_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index,
29612962
}
29622963

29632964
pi_result hip_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
2965+
const pi_mem_obj_property *arg_properties,
29642966
const pi_mem *arg_value) {
2967+
std::ignore = arg_properties;
29652968

29662969
assert(kernel != nullptr);
29672970
assert(arg_value != nullptr);

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,9 +370,10 @@ pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize,
370370

371371
// Special version of piKernelSetArg to accept pi_mem.
372372
pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
373+
const pi_mem_obj_property *ArgProperties,
373374
const pi_mem *ArgValue) {
374-
375-
return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgValue);
375+
return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties,
376+
ArgValue);
376377
}
377378

378379
// Special version of piKernelSetArg to accept pi_sampler.

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,7 +1116,9 @@ pi_result piSamplerCreate(pi_context context,
11161116
}
11171117

11181118
pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
1119+
const pi_mem_obj_property *arg_properties,
11191120
const pi_mem *arg_value) {
1121+
std::ignore = arg_properties;
11201122
return cast<pi_result>(
11211123
clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
11221124
sizeof(arg_value), cast<const cl_mem *>(arg_value)));

sycl/plugins/unified_runtime/pi2ur.hpp

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,8 +2003,10 @@ inline pi_result piextGetDeviceFunctionPointer(pi_device Device,
20032003
}
20042004

20052005
// Special version of piKernelSetArg to accept pi_mem.
2006-
inline pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
2007-
const pi_mem *ArgValue) {
2006+
inline pi_result
2007+
piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
2008+
const pi_mem_obj_property *ArgProperties,
2009+
const pi_mem *ArgValue) {
20082010

20092011
// TODO: the better way would probably be to add a new PI API for
20102012
// extracting native PI object from PI handle, and have SYCL
@@ -2017,21 +2019,43 @@ inline pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
20172019
if (ArgValue)
20182020
UrMemory = reinterpret_cast<ur_mem_handle_t>(*ArgValue);
20192021

2020-
ur_kernel_arg_mem_obj_properties_t Properties{};
2021-
20222022
// We don't yet know the device where this kernel will next be run on.
20232023
// Thus we can't know the actual memory allocation that needs to be used.
20242024
// Remember the memory object being used as an argument for this kernel
20252025
// to process it later when the device is known (at the kernel enqueue).
20262026
//
2027-
// TODO: for now we have to conservatively assume the access as read-write.
2028-
// Improve that by passing SYCL buffer accessor type into
2029-
// piextKernelSetArgMemObj.
2030-
//
2031-
20322027
ur_kernel_handle_t UrKernel = reinterpret_cast<ur_kernel_handle_t>(Kernel);
2033-
HANDLE_ERRORS(
2034-
urKernelSetArgMemObj(UrKernel, ArgIndex, &Properties, UrMemory));
2028+
// the only applicable type, just ignore anything else
2029+
if (ArgProperties && ArgProperties->type == PI_KERNEL_ARG_MEM_OBJ_ACCESS) {
2030+
// following structure layout checks to be replaced with
2031+
// std::is_layout_compatible after move to C++20
2032+
static_assert(sizeof(pi_mem_obj_property) ==
2033+
sizeof(ur_kernel_arg_mem_obj_properties_t));
2034+
static_assert(sizeof(pi_mem_obj_property::type) ==
2035+
sizeof(ur_kernel_arg_mem_obj_properties_t::stype));
2036+
static_assert(sizeof(pi_mem_obj_property::pNext) ==
2037+
sizeof(ur_kernel_arg_mem_obj_properties_t::pNext));
2038+
static_assert(sizeof(pi_mem_obj_property::mem_access) ==
2039+
sizeof(ur_kernel_arg_mem_obj_properties_t::memoryAccess));
2040+
2041+
static_assert(uint32_t(PI_ACCESS_READ_WRITE) ==
2042+
uint32_t(UR_MEM_FLAG_READ_WRITE));
2043+
static_assert(uint32_t(PI_ACCESS_READ_ONLY) ==
2044+
uint32_t(UR_MEM_FLAG_READ_ONLY));
2045+
static_assert(uint32_t(PI_ACCESS_WRITE_ONLY) ==
2046+
uint32_t(UR_MEM_FLAG_WRITE_ONLY));
2047+
static_assert(uint32_t(PI_KERNEL_ARG_MEM_OBJ_ACCESS) ==
2048+
uint32_t(UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES));
2049+
2050+
const ur_kernel_arg_mem_obj_properties_t *UrMemProperties =
2051+
reinterpret_cast<const ur_kernel_arg_mem_obj_properties_t *>(
2052+
ArgProperties);
2053+
HANDLE_ERRORS(
2054+
urKernelSetArgMemObj(UrKernel, ArgIndex, UrMemProperties, UrMemory));
2055+
} else {
2056+
HANDLE_ERRORS(urKernelSetArgMemObj(UrKernel, ArgIndex, nullptr, UrMemory));
2057+
}
2058+
20352059
return PI_SUCCESS;
20362060
}
20372061

sycl/plugins/unified_runtime/pi_unified_runtime.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,12 @@ __SYCL_EXPORT pi_result piKernelCreate(pi_program Program,
150150
}
151151

152152
// Special version of piKernelSetArg to accept pi_mem.
153-
__SYCL_EXPORT pi_result piextKernelSetArgMemObj(pi_kernel Kernel,
154-
pi_uint32 ArgIndex,
155-
const pi_mem *ArgValue) {
153+
__SYCL_EXPORT pi_result piextKernelSetArgMemObj(
154+
pi_kernel Kernel, pi_uint32 ArgIndex,
155+
const pi_mem_obj_property *ArgProperties, const pi_mem *ArgValue) {
156156

157-
return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgValue);
157+
return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties,
158+
ArgValue);
158159
}
159160

160161
__SYCL_EXPORT pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex,

sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -699,9 +699,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(
699699

700700
ur_mem_handle_t_ *UrMem = ur_cast<ur_mem_handle_t_ *>(ArgValue);
701701

702+
ur_mem_handle_t_::access_mode_t UrAccessMode = ur_mem_handle_t_::read_write;
703+
if (Properties) {
704+
switch (Properties->memoryAccess) {
705+
case UR_MEM_FLAG_READ_WRITE:
706+
UrAccessMode = ur_mem_handle_t_::read_write;
707+
break;
708+
case UR_MEM_FLAG_WRITE_ONLY:
709+
UrAccessMode = ur_mem_handle_t_::write_only;
710+
break;
711+
case UR_MEM_FLAG_READ_ONLY:
712+
UrAccessMode = ur_mem_handle_t_::read_only;
713+
break;
714+
default:
715+
return UR_RESULT_ERROR_INVALID_ARGUMENT;
716+
}
717+
}
702718
auto Arg = UrMem ? UrMem : nullptr;
703719
Kernel->PendingArguments.push_back(
704-
{ArgIndex, sizeof(void *), Arg, ur_mem_handle_t_::read_write});
720+
{ArgIndex, sizeof(void *), Arg, UrAccessMode});
705721

706722
return UR_RESULT_SUCCESS;
707723
}

sycl/source/detail/scheduler/commands.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2178,6 +2178,18 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) {
21782178
}
21792179
}
21802180

2181+
pi_mem_obj_access AccessModeToPi(access::mode AccessorMode) {
2182+
switch (AccessorMode) {
2183+
case access::mode::read:
2184+
return PI_ACCESS_READ_ONLY;
2185+
case access::mode::write:
2186+
case access::mode::discard_write:
2187+
return PI_ACCESS_WRITE_ONLY;
2188+
default:
2189+
return PI_ACCESS_READ_WRITE;
2190+
}
2191+
}
2192+
21812193
static pi_result SetKernelParamsAndLaunch(
21822194
const QueueImplPtr &Queue, std::vector<ArgDesc> &Args,
21832195
const std::shared_ptr<device_image_impl> &DeviceImageImpl,
@@ -2212,8 +2224,11 @@ static pi_result SetKernelParamsAndLaunch(
22122224
Plugin->call<PiApiKind::piKernelSetArg>(
22132225
Kernel, NextTrueIndex, sizeof(sycl::detail::pi::PiMem), &MemArg);
22142226
} else {
2227+
pi_mem_obj_property MemObjData{};
2228+
MemObjData.mem_access = AccessModeToPi(Req->MAccessMode);
2229+
MemObjData.type = PI_KERNEL_ARG_MEM_OBJ_ACCESS;
22152230
Plugin->call<PiApiKind::piextKernelSetArgMemObj>(Kernel, NextTrueIndex,
2216-
&MemArg);
2231+
&MemObjData, &MemArg);
22172232
}
22182233
break;
22192234
}
@@ -2250,8 +2265,12 @@ static pi_result SetKernelParamsAndLaunch(
22502265
// Avoid taking an address of nullptr
22512266
sycl::detail::pi::PiMem *SpecConstsBufferArg =
22522267
SpecConstsBuffer ? &SpecConstsBuffer : nullptr;
2253-
Plugin->call<PiApiKind::piextKernelSetArgMemObj>(Kernel, NextTrueIndex,
2254-
SpecConstsBufferArg);
2268+
2269+
pi_mem_obj_property MemObjData{};
2270+
MemObjData.mem_access = PI_ACCESS_READ_ONLY;
2271+
MemObjData.type = PI_KERNEL_ARG_MEM_OBJ_ACCESS;
2272+
Plugin->call<PiApiKind::piextKernelSetArgMemObj>(
2273+
Kernel, NextTrueIndex, &MemObjData, SpecConstsBufferArg);
22552274
break;
22562275
}
22572276
case kernel_param_kind_t::kind_invalid:

sycl/unittests/buffer/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ add_sycl_unittest(BufferTests OBJECT
33
Image.cpp
44
BufferDestructionCheck.cpp
55
MemChannel.cpp
6+
KernelArgMemObj.cpp
67
)

0 commit comments

Comments
 (0)