Skip to content

Commit aff5fee

Browse files
committed
[SYCL] Implement SYCL_INTEL_mem_channel_property extension
On some targets manual assignment of buffers to memory regions can improve memory bandwidth. This extension adds a buffer property to indicate in which memory channel a particular buffer should be allocated. This information is an optimization hint to the runtime and thus it is legal to ignore. Spec: #2688 Signed-off-by: Dmitry Sidorov <[email protected]>
1 parent e9423ff commit aff5fee

File tree

11 files changed

+123
-15
lines changed

11 files changed

+123
-15
lines changed

sycl/include/CL/sycl/aspects.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ enum class aspect {
3030
usm_host_allocations,
3131
usm_shared_allocations,
3232
usm_restricted_shared_allocations,
33-
usm_system_allocator
33+
usm_system_allocator,
34+
ext_intel_mem_channel
3435
};
3536

3637
} // namespace sycl

sycl/include/CL/sycl/detail/pi.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,9 +503,8 @@ constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION =
503503

504504
// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to
505505
// make the translation to OpenCL transparent.
506-
// TODO: populate
507-
//
508506
using pi_mem_properties = pi_bitfield;
507+
constexpr pi_mem_properties PI_MEM_CHANNEL_INTEL = CL_MEM_CHANNEL_INTEL;
509508

510509
// NOTE: queue properties are implemented this way to better support bit
511510
// manipulations

sycl/include/CL/sycl/detail/property_helper.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ enum PropWithDataKind {
4040
BufferContextBound,
4141
ImageUseMutex,
4242
ImageContextBound,
43+
BufferMemChannel,
4344
PropWithDataKindSize
4445
};
4546

sycl/include/CL/sycl/info/device_traits.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,4 @@ __SYCL_PARAM_TRAITS_SPEC(device, usm_host_allocations, bool)
8585
__SYCL_PARAM_TRAITS_SPEC(device, usm_shared_allocations, bool)
8686
__SYCL_PARAM_TRAITS_SPEC(device, usm_restricted_shared_allocations, bool)
8787
__SYCL_PARAM_TRAITS_SPEC(device, usm_system_allocator, bool)
88+
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_mem_channel, bool)

sycl/include/CL/sycl/info/info_desc.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,9 @@ enum class device : cl_device_info {
130130
usm_host_allocations = PI_USM_HOST_SUPPORT,
131131
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
132132
usm_restricted_shared_allocations = PI_USM_CROSS_SHARED_SUPPORT,
133-
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT
133+
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT,
134+
135+
ext_intel_mem_channel = PI_MEM_CHANNEL_INTEL
134136
};
135137

136138
enum class device_type : pi_uint64 {

sycl/include/CL/sycl/properties/buffer_properties.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,17 @@ class context_bound
3939
private:
4040
sycl::context MCtx;
4141
};
42+
43+
class mem_channel : public detail::PropertyWithData<
44+
detail::PropWithDataKind::BufferMemChannel> {
45+
public:
46+
mem_channel(cl_uint Channel) : m_Channel(Channel) {}
47+
cl_uint get_channel() const { return m_Channel; }
48+
49+
private:
50+
cl_uint m_Channel;
51+
};
52+
4253
} // namespace buffer
4354
} // namespace property
4455

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -523,22 +523,62 @@ pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size,
523523
void *host_ptr, pi_mem *ret_mem,
524524
const pi_mem_properties *properties) {
525525
pi_result ret_err = PI_INVALID_OPERATION;
526-
clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;
527-
528-
if (properties)
526+
if (properties) {
527+
clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;
528+
const size_t propSize = sizeof(properties) / sizeof(pi_mem_properties);
529529
// First we need to look up the function pointer
530530
ret_err = getExtFuncFromContext<clCreateBufferWithPropertiesName,
531531
clCreateBufferWithPropertiesINTEL_fn>(
532532
context, &FuncPtr);
533+
if (FuncPtr) {
534+
std::vector<pi_mem_properties> supported(properties,
535+
properties + propSize);
536+
// Go through buffer properties. If there is one, that shall be propagated
537+
// to an OpenCL runtime - check if this property is being supported.
538+
for (auto prop = supported.begin(); prop != supported.end(); ++prop) {
539+
// Check if PI_MEM_CHANNEL_INTEL property is supported. If it's not -
540+
// just ignore it, as it's an optimization hint.
541+
if (*prop == PI_MEM_CHANNEL_INTEL) {
542+
size_t deviceCount;
543+
cl_int ret_err =
544+
clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES, 0,
545+
nullptr, &deviceCount);
546+
if (ret_err != CL_SUCCESS || deviceCount < 1)
547+
return PI_INVALID_CONTEXT;
548+
std::vector<cl_device_id> devicesInCtx(deviceCount);
549+
ret_err = clGetContextInfo(
550+
cast<cl_context>(context), CL_CONTEXT_DEVICES,
551+
deviceCount * sizeof(cl_device_id), devicesInCtx.data(), nullptr);
552+
553+
size_t retSize;
554+
ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_EXTENSIONS, 0,
555+
nullptr, &retSize);
556+
if (ret_err != CL_SUCCESS)
557+
return PI_INVALID_DEVICE;
558+
std::string extensions(retSize, '\0');
559+
ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_EXTENSIONS,
560+
retSize, &extensions[0], nullptr);
561+
if (ret_err != CL_SUCCESS)
562+
return PI_INVALID_DEVICE;
563+
564+
size_t pos = extensions.find("cl_intel_mem_channel_property");
565+
if (pos == std::string::npos)
566+
supported.erase(prop);
567+
}
568+
}
569+
if (!supported.empty()) {
570+
*ret_mem =
571+
cast<pi_mem>(FuncPtr(cast<cl_context>(context), supported.data(),
572+
cast<cl_mem_flags>(flags), size, host_ptr,
573+
cast<cl_int *>(&ret_err)));
574+
return ret_err;
575+
}
576+
}
577+
}
533578

534-
if (FuncPtr)
535-
*ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(context), properties,
536-
cast<cl_mem_flags>(flags), size, host_ptr,
537-
cast<cl_int *>(&ret_err)));
538-
else
539-
*ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
540-
cast<cl_mem_flags>(flags), size,
541-
host_ptr, cast<cl_int *>(&ret_err)));
579+
*ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
580+
cast<cl_mem_flags>(flags), size,
581+
host_ptr, cast<cl_int *>(&ret_err)));
542582
return ret_err;
543583
}
544584

sycl/source/detail/device_info.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,11 @@ inline bool get_device_info_host<info::device::usm_system_allocator>() {
936936
return true;
937937
}
938938

939+
template <>
940+
inline bool get_device_info_host<info::device::ext_intel_mem_channel>() {
941+
return false;
942+
}
943+
939944
cl_uint get_native_vector_width(size_t idx);
940945

941946
// USM
@@ -1003,6 +1008,17 @@ template <> struct get_device_info<bool, info::device::usm_system_allocator> {
10031008
}
10041009
};
10051010

1011+
// Specialization for memory channel query
1012+
template <> struct get_device_info<bool, info::device::ext_intel_mem_channel> {
1013+
static bool get(RT::PiDevice dev, const plugin &Plugin) {
1014+
pi_mem_properties caps;
1015+
pi_result Err = Plugin.call_nocheck<PiApiKind::piDeviceGetInfo>(
1016+
dev, pi::cast<RT::PiDeviceInfo>(info::device::ext_intel_mem_channel),
1017+
sizeof(pi_mem_properties), &caps, nullptr);
1018+
return (Err != PI_SUCCESS) ? false : (caps & PI_MEM_CHANNEL_INTEL);
1019+
}
1020+
};
1021+
10061022
} // namespace detail
10071023
} // namespace sycl
10081024
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4036,6 +4036,7 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4168EEENS3_12param_traitsIS4_XT_E
40364036
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4169EEENS3_12param_traitsIS4_XT_EE11return_typeEv
40374037
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4188EEENS3_12param_traitsIS4_XT_EE11return_typeEv
40384038
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4189EEENS3_12param_traitsIS4_XT_EE11return_typeEv
4039+
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16915EEENS3_12param_traitsIS4_XT_EE11return_typeEv
40394040
_ZNK2cl4sycl6device9getNativeEv
40404041
_ZNK2cl4sycl6kernel11get_contextEv
40414042
_ZNK2cl4sycl6kernel11get_programEv

sycl/test/basic_tests/property_list.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,23 @@ int main() {
6464
}
6565
}
6666

67+
{
68+
cl::sycl::property_list MemChannelProp{
69+
sycl_property::buffer::mem_channel(2)};
70+
if (!MemChannelProp.has_property<sycl_property::buffer::mem_channel>()) {
71+
std::cerr << "Error: property list has no property while should have."
72+
<< std::endl;
73+
Failed = true;
74+
}
75+
auto Prop =
76+
MemChannelProp.get_property<sycl_property::buffer::mem_channel>();
77+
if (Prop.get_channel() != 2) {
78+
std::cerr << "Error: mem_channel property is not equal to 2."
79+
<< std::endl;
80+
Failed = true;
81+
}
82+
}
83+
6784
std::cerr << "Test status : " << (Failed ? "FAILED" : "PASSED") << std::endl;
6885

6986
return Failed;

sycl/test/on-device/basic_tests/buffer/buffer.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,25 @@ int main() {
4040
assert(data1[i] == 0);
4141
}
4242

43+
{
44+
int data1[10] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
45+
{
46+
buffer<int, 1> b(data1, range<1>(10), {property::buffer::mem_channel{3}});
47+
queue myQueue;
48+
myQueue.submit([&](handler &cgh) {
49+
auto B = b.get_access<access::mode::read_write>(cgh);
50+
cgh.parallel_for<class init_a_2>(range<1>{10},
51+
[=](id<1> index) { B[index] = 0; });
52+
});
53+
assert(b.has_property<property::buffer::mem_channel>());
54+
auto prop = b.get_property<property::buffer::mem_channel>();
55+
assert(prop.get_channel() == 3 && "oops it's not 3");
56+
57+
} // Data is copied back because there is a user side shared_ptr
58+
for (int i = 0; i < 10; i++)
59+
assert(data1[i] == 0);
60+
}
61+
4362
{
4463
std::vector<int> data1(10, -1);
4564
{

0 commit comments

Comments
 (0)