Skip to content

Commit 12c988a

Browse files
authored
[SYCL] USM Buffer Location Properties (#5634)
Lower level runtime's usm allocation API now supports pass in of buffer location property [1] defined in OpenCL spec [2] For this feature to be accessible to users, sycl runtime now passes in usm::buffer_location property defined in sycl spec [3] into the opencl runtime calls, the property is only passed if the extension name occurs in the platform extension string. If the lower level runtime does not support such property, then the property will yield no effect. This is a temporary solution to allow user to specify which memory location the device usm allocation should be in. The full solution will require retuning an annotated_ptr that carries compile time properties for further optimization. The full solution spec is in [4] [1] https://github.com/intel/fpga-runtime-for-opencl/pull/46/files [2] https://github.com/KhronosGroup/OpenCL-Docs/blob/master/extensions/cl_intel_mem_alloc_buffer_location.asciidoc [3] #5665 [4] #5656
1 parent 788196e commit 12c988a

File tree

7 files changed

+78
-12
lines changed

7 files changed

+78
-12
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,13 @@ constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION =
592592
using pi_mem_properties = pi_bitfield;
593593
constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL = CL_MEM_CHANNEL_INTEL;
594594

595+
// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to
596+
// make the translation to OpenCL transparent.
597+
using pi_usm_mem_properties = pi_bitfield;
598+
constexpr pi_usm_mem_properties PI_MEM_ALLOC_FLAGS = CL_MEM_ALLOC_FLAGS_INTEL;
599+
constexpr pi_usm_mem_properties PI_MEM_USM_ALLOC_BUFFER_LOCATION =
600+
CL_MEM_ALLOC_BUFFER_LOCATION_INTEL;
601+
595602
// NOTE: queue properties are implemented this way to better support bit
596603
// manipulations
597604
using pi_queue_properties = pi_bitfield;
@@ -1609,10 +1616,6 @@ typedef enum {
16091616
PI_MEM_TYPE_SHARED = CL_MEM_TYPE_SHARED_INTEL
16101617
} _pi_usm_type;
16111618

1612-
typedef enum : pi_bitfield {
1613-
PI_MEM_ALLOC_FLAGS = CL_MEM_ALLOC_FLAGS_INTEL
1614-
} _pi_usm_mem_properties;
1615-
16161619
// Flag is used for piProgramUSMEnqueuePrefetch. PI_USM_MIGRATION_TBD0 is a
16171620
// placeholder for future developments and should not change the behaviour of
16181621
// piProgramUSMEnqueuePrefetch
@@ -1624,7 +1627,6 @@ using pi_usm_capability_query = _pi_usm_capability_query;
16241627
using pi_usm_capabilities = _pi_usm_capabilities;
16251628
using pi_mem_info = _pi_mem_info;
16261629
using pi_usm_type = _pi_usm_type;
1627-
using pi_usm_mem_properties = _pi_usm_mem_properties;
16281630
using pi_usm_migration_flags = _pi_usm_migration_flags;
16291631

16301632
/// Allocates host memory accessible by the device.

sycl/include/CL/sycl/detail/property_helper.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ enum PropWithDataKind {
4747
ImageUseMutex = 2,
4848
ImageContextBound = 3,
4949
BufferMemChannel = 4,
50-
PropWithDataKindSize = 5
50+
AccPropBufferLocation = 5,
51+
PropWithDataKindSize = 6
5152
};
5253

5354
// Base class for dataless properties, needed to check that the type of an

sycl/include/CL/sycl/feature_test.hpp.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ namespace sycl {
6767
#define SYCL_EXT_INTEL_KERNEL_ARGS_RESTRICT 1
6868
#define SYCL_EXT_INTEL_MEM_CHANNEL_PROPERTY 1
6969
#define SYCL_EXT_INTEL_USM_ADDRESS_SPACES 1
70+
#define SYCL_EXT_INTEL_RUNTIME_BUFFER_LOCATION 1
7071
#define SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO 1
7172
#cmakedefine01 SYCL_BUILD_PI_CUDA
7273
#if SYCL_BUILD_PI_CUDA

sycl/include/CL/sycl/usm.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <CL/sycl/detail/export.hpp>
1111
#include <CL/sycl/usm/usm_allocator.hpp>
1212
#include <CL/sycl/usm/usm_enums.hpp>
13+
#include <sycl/ext/intel/experimental/usm_properties.hpp>
1314

1415
#include <cstddef>
1516

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#pragma once
2+
3+
#include <CL/sycl/context.hpp>
4+
#include <CL/sycl/detail/property_helper.hpp>
5+
#include <CL/sycl/properties/property_traits.hpp>
6+
7+
__SYCL_INLINE_NAMESPACE(cl) {
8+
namespace sycl {
9+
namespace ext {
10+
namespace intel {
11+
namespace experimental {
12+
namespace property {
13+
namespace usm {
14+
15+
class buffer_location
16+
: public sycl::detail::PropertyWithData<
17+
sycl::detail::PropWithDataKind::AccPropBufferLocation> {
18+
public:
19+
buffer_location(uint64_t Location) : MLocation(Location) {}
20+
uint64_t get_buffer_location() const { return MLocation; }
21+
22+
private:
23+
uint64_t MLocation;
24+
};
25+
26+
} // namespace usm
27+
} // namespace property
28+
} // namespace experimental
29+
} // namespace intel
30+
} // namespace ext
31+
32+
template <>
33+
struct is_property<ext::intel::experimental::property::usm::buffer_location>
34+
: std::true_type {};
35+
36+
} // namespace sycl
37+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/source/detail/usm/usm_impl.cpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,13 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt,
9393

9494
void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt,
9595
const device &Dev, alloc Kind,
96-
const detail::code_location &CL) {
96+
const detail::code_location &CL,
97+
const property_list &PropList = {}) {
9798
XPTI_CREATE_TRACEPOINT(CL);
9899
void *RetVal = nullptr;
99100
if (Size == 0)
100101
return nullptr;
102+
101103
if (Ctxt.is_host()) {
102104
if (Kind == alloc::unknown) {
103105
RetVal = nullptr;
@@ -125,8 +127,25 @@ void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt,
125127
switch (Kind) {
126128
case alloc::device: {
127129
Id = detail::getSyclObjImpl(Dev)->getHandleRef();
128-
Error = Plugin.call_nocheck<PiApiKind::piextUSMDeviceAlloc>(
129-
&RetVal, C, Id, nullptr, Size, Alignment);
130+
// Parse out buffer location property
131+
// Buffer location is only supported on FPGA devices
132+
bool IsBufferLocSupported =
133+
Dev.has_extension("cl_intel_mem_alloc_buffer_location");
134+
if (IsBufferLocSupported &&
135+
PropList.has_property<cl::sycl::ext::intel::experimental::property::
136+
usm::buffer_location>()) {
137+
auto location = PropList
138+
.get_property<cl::sycl::ext::intel::experimental::
139+
property::usm::buffer_location>()
140+
.get_buffer_location();
141+
pi_usm_mem_properties props[3] = {PI_MEM_USM_ALLOC_BUFFER_LOCATION,
142+
location, 0};
143+
Error = Plugin.call_nocheck<PiApiKind::piextUSMDeviceAlloc>(
144+
&RetVal, C, Id, props, Size, Alignment);
145+
} else {
146+
Error = Plugin.call_nocheck<PiApiKind::piextUSMDeviceAlloc>(
147+
&RetVal, C, Id, nullptr, Size, Alignment);
148+
}
130149
break;
131150
}
132151
case alloc::shared: {
@@ -193,8 +212,10 @@ void *malloc_device(size_t Size, const device &Dev, const context &Ctxt,
193212
}
194213

195214
void *malloc_device(size_t Size, const device &Dev, const context &Ctxt,
196-
const property_list &, const detail::code_location CL) {
197-
return malloc_device(Size, Dev, Ctxt, CL);
215+
const property_list &PropList,
216+
const detail::code_location CL) {
217+
return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device, CL,
218+
PropList);
198219
}
199220

200221
void *malloc_device(size_t Size, const queue &Q,

sycl/test/extensions/usm/usm_alloc_utility.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,10 @@ int main() {
8383
array = (int *)malloc_device(N * sizeof(int), q);
8484
check_and_free(array, dev, ctxt);
8585

86-
array = (int *)malloc_device(N * sizeof(int), q, property_list{});
86+
array = malloc_device<int>(
87+
N, q,
88+
property_list{
89+
ext::intel::experimental::property::usm::buffer_location(2)});
8790
check_and_free(array, dev, ctxt);
8891

8992
array = (int *)aligned_alloc_device(alignof(long long), N * sizeof(int),

0 commit comments

Comments
 (0)