Skip to content

Commit 2f1f316

Browse files
authored
[SYCL] Implement SYCL_INTEL_mem_channel_property extension (#2762)
On some targets manual assignment of buffers to memory regions can improve memory bandwidth. This extension adds a buffer property to indicate in which memory channel a particular buffer should be allocated. This information is an optimization hint to the runtime and thus it is legal to ignore. Spec: #2688 Signed-off-by: Dmitry Sidorov <[email protected]>
1 parent a3bbc11 commit 2f1f316

File tree

11 files changed

+99
-26
lines changed

11 files changed

+99
-26
lines changed

sycl/include/CL/sycl/aspects.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ enum class aspect {
3737
ext_intel_gpu_slices,
3838
ext_intel_gpu_subslices_per_slice,
3939
ext_intel_gpu_eu_count_per_subslice,
40-
ext_intel_max_mem_bandwidth
40+
ext_intel_max_mem_bandwidth,
41+
ext_intel_mem_channel
4142
};
4243

4344
} // namespace sycl

sycl/include/CL/sycl/detail/pi.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,9 +517,8 @@ constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION =
517517

518518
// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to
519519
// make the translation to OpenCL transparent.
520-
// TODO: populate
521-
//
522520
using pi_mem_properties = pi_bitfield;
521+
constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL = CL_MEM_CHANNEL_INTEL;
523522

524523
// NOTE: queue properties are implemented this way to better support bit
525524
// manipulations

sycl/include/CL/sycl/detail/property_helper.hpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,23 @@ namespace detail {
2525
// List of all dataless properties' IDs
2626
enum DataLessPropKind {
2727
BufferUseHostPtr = 0,
28-
ImageUseHostPtr,
29-
QueueEnableProfiling,
30-
InOrder,
31-
NoInit,
32-
BufferUsePinnedHostMemory,
33-
UsePrimaryContext,
34-
DataLessPropKindSize
28+
ImageUseHostPtr = 1,
29+
QueueEnableProfiling = 2,
30+
InOrder = 3,
31+
NoInit = 4,
32+
BufferUsePinnedHostMemory = 5,
33+
UsePrimaryContext = 6,
34+
DataLessPropKindSize = 7
3535
};
3636

3737
// List of all properties with data IDs
3838
enum PropWithDataKind {
3939
BufferUseMutex = 0,
40-
BufferContextBound,
41-
ImageUseMutex,
42-
ImageContextBound,
43-
PropWithDataKindSize
40+
BufferContextBound = 1,
41+
ImageUseMutex = 2,
42+
ImageContextBound = 3,
43+
BufferMemChannel = 4,
44+
PropWithDataKindSize = 5
4445
};
4546

4647
// Base class for dataless properties, needed to check that the type of an

sycl/include/CL/sycl/info/device_traits.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,4 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_slices, pi_uint32)
9292
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_subslices_per_slice, pi_uint32)
9393
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_count_per_subslice, pi_uint32)
9494
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_max_mem_bandwidth, pi_uint64)
95+
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_mem_channel, bool)

sycl/include/CL/sycl/info/info_desc.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ enum class device : cl_device_info {
131131
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
132132
usm_restricted_shared_allocations = PI_USM_CROSS_SHARED_SUPPORT,
133133
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT,
134+
134135
// intel extensions
135136
ext_intel_pci_address = PI_DEVICE_INFO_PCI_ADDRESS,
136137
ext_intel_gpu_eu_count = PI_DEVICE_INFO_GPU_EU_COUNT,
@@ -139,7 +140,8 @@ enum class device : cl_device_info {
139140
ext_intel_gpu_subslices_per_slice = PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE,
140141
ext_intel_gpu_eu_count_per_subslice =
141142
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE,
142-
ext_intel_max_mem_bandwidth = PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
143+
ext_intel_max_mem_bandwidth = PI_DEVICE_INFO_MAX_MEM_BANDWIDTH,
144+
ext_intel_mem_channel = PI_MEM_PROPERTIES_CHANNEL
143145
};
144146

145147
enum class device_type : pi_uint64 {

sycl/include/CL/sycl/properties/buffer_properties.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,17 @@ class context_bound
3939
private:
4040
sycl::context MCtx;
4141
};
42+
43+
class mem_channel : public detail::PropertyWithData<
44+
detail::PropWithDataKind::BufferMemChannel> {
45+
public:
46+
mem_channel(uint32_t Channel) : MChannel(Channel) {}
47+
uint32_t get_channel() const { return MChannel; }
48+
49+
private:
50+
uint32_t MChannel;
51+
};
52+
4253
} // namespace buffer
4354
} // namespace property
4455

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
#include <CL/sycl/detail/cl.h>
1818
#include <CL/sycl/detail/pi.h>
1919

20+
#include <algorithm>
2021
#include <cassert>
2122
#include <cstring>
2223
#include <iostream>
2324
#include <limits>
2425
#include <map>
26+
#include <sstream>
2527
#include <string>
2628
#include <vector>
2729

@@ -546,22 +548,25 @@ pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size,
546548
void *host_ptr, pi_mem *ret_mem,
547549
const pi_mem_properties *properties) {
548550
pi_result ret_err = PI_INVALID_OPERATION;
549-
clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;
550-
551-
if (properties)
551+
if (properties) {
552+
// TODO: need to check if all properties are supported by OpenCL RT and
553+
// ignore unsupported
554+
clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;
552555
// First we need to look up the function pointer
553556
ret_err = getExtFuncFromContext<clCreateBufferWithPropertiesName,
554557
clCreateBufferWithPropertiesINTEL_fn>(
555558
context, &FuncPtr);
559+
if (FuncPtr) {
560+
*ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(context), properties,
561+
cast<cl_mem_flags>(flags), size, host_ptr,
562+
cast<cl_int *>(&ret_err)));
563+
return ret_err;
564+
}
565+
}
556566

557-
if (FuncPtr)
558-
*ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(context), properties,
559-
cast<cl_mem_flags>(flags), size, host_ptr,
560-
cast<cl_int *>(&ret_err)));
561-
else
562-
*ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
563-
cast<cl_mem_flags>(flags), size,
564-
host_ptr, cast<cl_int *>(&ret_err)));
567+
*ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
568+
cast<cl_mem_flags>(flags), size,
569+
host_ptr, cast<cl_int *>(&ret_err)));
565570
return ret_err;
566571
}
567572

sycl/source/detail/device_info.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,11 @@ inline bool get_device_info_host<info::device::usm_system_allocator>() {
936936
return true;
937937
}
938938

939+
template <>
940+
inline bool get_device_info_host<info::device::ext_intel_mem_channel>() {
941+
return false;
942+
}
943+
939944
cl_uint get_native_vector_width(size_t idx);
940945

941946
// USM
@@ -1003,6 +1008,17 @@ template <> struct get_device_info<bool, info::device::usm_system_allocator> {
10031008
}
10041009
};
10051010

1011+
// Specialization for memory channel query
1012+
template <> struct get_device_info<bool, info::device::ext_intel_mem_channel> {
1013+
static bool get(RT::PiDevice dev, const plugin &Plugin) {
1014+
pi_mem_properties caps;
1015+
pi_result Err = Plugin.call_nocheck<PiApiKind::piDeviceGetInfo>(
1016+
dev, pi::cast<RT::PiDeviceInfo>(info::device::ext_intel_mem_channel),
1017+
sizeof(pi_mem_properties), &caps, nullptr);
1018+
return (Err != PI_SUCCESS) ? false : (caps & PI_MEM_PROPERTIES_CHANNEL);
1019+
}
1020+
};
1021+
10061022
// Specializations for intel extensions for Level Zero low-level
10071023
// detail device descriptors (not support on host).
10081024
template <>

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4036,6 +4036,7 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4168EEENS3_12param_traitsIS4_XT_E
40364036
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4169EEENS3_12param_traitsIS4_XT_EE11return_typeEv
40374037
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4188EEENS3_12param_traitsIS4_XT_EE11return_typeEv
40384038
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4189EEENS3_12param_traitsIS4_XT_EE11return_typeEv
4039+
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16915EEENS3_12param_traitsIS4_XT_EE11return_typeEv
40394040
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65568EEENS3_12param_traitsIS4_XT_EE11return_typeEv
40404041
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65569EEENS3_12param_traitsIS4_XT_EE11return_typeEv
40414042
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65570EEENS3_12param_traitsIS4_XT_EE11return_typeEv

sycl/test/basic_tests/property_list.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,23 @@ int main() {
6464
}
6565
}
6666

67+
{
68+
cl::sycl::property_list MemChannelProp{
69+
sycl_property::buffer::mem_channel(2)};
70+
if (!MemChannelProp.has_property<sycl_property::buffer::mem_channel>()) {
71+
std::cerr << "Error: property list has no property while should have."
72+
<< std::endl;
73+
Failed = true;
74+
}
75+
auto Prop =
76+
MemChannelProp.get_property<sycl_property::buffer::mem_channel>();
77+
if (Prop.get_channel() != 2) {
78+
std::cerr << "Error: mem_channel property is not equal to 2."
79+
<< std::endl;
80+
Failed = true;
81+
}
82+
}
83+
6784
std::cerr << "Test status : " << (Failed ? "FAILED" : "PASSED") << std::endl;
6885

6986
return Failed;

sycl/test/on-device/basic_tests/buffer/buffer.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,25 @@ int main() {
4040
assert(data1[i] == 0);
4141
}
4242

43+
{
44+
int data1[10] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
45+
{
46+
buffer<int, 1> b(data1, range<1>(10), {property::buffer::mem_channel{3}});
47+
queue myQueue;
48+
myQueue.submit([&](handler &cgh) {
49+
auto B = b.get_access<access::mode::read_write>(cgh);
50+
cgh.parallel_for<class init_a_2>(range<1>{10},
51+
[=](id<1> index) { B[index] = 0; });
52+
});
53+
assert(b.has_property<property::buffer::mem_channel>());
54+
auto prop = b.get_property<property::buffer::mem_channel>();
55+
assert(prop.get_channel() == 3 && "oops it's not 3");
56+
57+
} // Data is copied back because there is a user side shared_ptr
58+
for (int i = 0; i < 10; i++)
59+
assert(data1[i] == 0);
60+
}
61+
4362
{
4463
std::vector<int> data1(10, -1);
4564
{

0 commit comments

Comments
 (0)