12
12
#include < CL/sycl/context.hpp>
13
13
#include < CL/sycl/detail/common.hpp>
14
14
#include < CL/sycl/detail/helpers.hpp>
15
+ #include < CL/sycl/detail/aligned_allocator.hpp>
15
16
#include < CL/sycl/detail/queue_impl.hpp>
16
17
#include < CL/sycl/detail/scheduler/scheduler.h>
17
18
#include < CL/sycl/handler.hpp>
@@ -37,7 +38,7 @@ class handler;
37
38
class queue ;
38
39
template <int dimentions> class id ;
39
40
template <int dimentions> class range ;
40
- using buffer_allocator = std::allocator <char >;
41
+ using buffer_allocator = aligned_allocator <char , /* alignment */ 64 >;
41
42
namespace detail {
42
43
template <typename AllocatorT> class buffer_impl {
43
44
public:
@@ -48,16 +49,15 @@ template <typename AllocatorT> class buffer_impl {
48
49
buffer_impl (void *hostData, const size_t sizeInBytes,
49
50
const property_list &propList,
50
51
AllocatorT allocator = AllocatorT())
51
- : SizeInBytes(sizeInBytes), Props(propList), MAllocator(allocator) {
52
- if (Props.has_property <property::buffer::use_host_ptr>()) {
53
- BufPtr = hostData;
54
- } else {
55
- BufData.resize (get_size ());
56
- BufPtr = reinterpret_cast <void *>(BufData.data ());
57
- if (hostData != nullptr ) {
58
- auto HostPtr = reinterpret_cast <char *>(hostData);
59
- set_final_data (HostPtr);
60
- std::copy (HostPtr, HostPtr + SizeInBytes, BufData.data ());
52
+ : SizeInBytes(sizeInBytes), Props(propList), BufPtr(hostData),
53
+ MAllocator (allocator) {
54
+ if (!Props.has_property <property::buffer::use_host_ptr>()) {
55
+ BufPtr = allocateHostMem ();
56
+ if (hostData) {
57
+ set_final_data (reinterpret_cast <char *>(hostData));
58
+ std::copy (reinterpret_cast <char *>(hostData),
59
+ reinterpret_cast <char *>(hostData) + SizeInBytes,
60
+ reinterpret_cast <char *>(BufPtr));
61
61
}
62
62
}
63
63
}
@@ -66,35 +66,33 @@ template <typename AllocatorT> class buffer_impl {
66
66
buffer_impl (const void *hostData, const size_t sizeInBytes,
67
67
const property_list &propList,
68
68
AllocatorT allocator = AllocatorT())
69
- : SizeInBytes(sizeInBytes), Props(propList), MAllocator(allocator) {
70
- if (Props.has_property <property::buffer::use_host_ptr>()) {
69
+ : SizeInBytes(sizeInBytes), Props(propList),
70
+ BufPtr(const_cast <void *>(hostData)), MAllocator(allocator) {
71
+ if (!Props.has_property <property::buffer::use_host_ptr>()) {
71
72
// TODO make this buffer read only
72
- BufPtr = const_cast <void *>(hostData);
73
- } else {
74
- BufData.resize (get_size ());
75
- BufPtr = reinterpret_cast <void *>(BufData.data ());
76
- if (hostData != nullptr ) {
77
- std::copy ((char *)hostData, (char *)hostData + SizeInBytes,
78
- BufData.data ());
79
- }
73
+ BufPtr = allocateHostMem ();
74
+ if (hostData)
75
+ std::copy (const_cast <char *>(reinterpret_cast <const char *>(hostData)),
76
+ const_cast <char *>(
77
+ reinterpret_cast <const char *>(hostData)) + SizeInBytes,
78
+ const_cast <char *>(reinterpret_cast <const char *>(BufPtr)));
80
79
}
81
80
}
82
81
83
82
template <typename T>
84
83
buffer_impl (const shared_ptr_class<T> &hostData, const size_t sizeInBytes,
85
84
const property_list &propList,
86
85
AllocatorT allocator = AllocatorT())
87
- : SizeInBytes(sizeInBytes), Props(propList), MAllocator(allocator) {
88
- if (Props.has_property <property::buffer::use_host_ptr>()) {
89
- BufPtr = hostData.get ();
90
- } else {
91
- BufData.resize (get_size ());
92
- BufPtr = reinterpret_cast <void *>(BufData.data ());
93
- if (hostData.get () != nullptr ) {
86
+ : SizeInBytes(sizeInBytes), Props(propList), BufPtr(hostData.get()),
87
+ MAllocator(allocator) {
88
+ if (!Props.has_property <property::buffer::use_host_ptr>()) {
89
+ BufPtr = allocateHostMem ();
90
+ if (hostData.get ()) {
94
91
weak_ptr_class<T> hostDataWeak = hostData;
95
92
set_final_data (hostDataWeak);
96
- std::copy ((char *)hostData.get (), (char *)hostData.get () + SizeInBytes,
97
- BufData.data ());
93
+ std::copy (reinterpret_cast <char *>(hostData.get ()),
94
+ reinterpret_cast <char *>(hostData.get ()) + SizeInBytes,
95
+ reinterpret_cast <char *>(BufPtr));
98
96
}
99
97
}
100
98
}
@@ -120,8 +118,7 @@ template <typename AllocatorT> class buffer_impl {
120
118
const size_t sizeInBytes, const property_list &propList,
121
119
AllocatorT allocator = AllocatorT())
122
120
: SizeInBytes(sizeInBytes), Props(propList), MAllocator(allocator) {
123
- BufData.resize (get_size ());
124
- BufPtr = reinterpret_cast <void *>(BufData.data ());
121
+ BufPtr = allocateHostMem ();
125
122
// We need cast BufPtr to pointer to the iteration type to get correct
126
123
// offset in std::copy when it will increment destination pointer.
127
124
auto *Ptr =
@@ -137,8 +134,7 @@ template <typename AllocatorT> class buffer_impl {
137
134
const size_t sizeInBytes, const property_list &propList,
138
135
AllocatorT allocator = AllocatorT())
139
136
: SizeInBytes(sizeInBytes), Props(propList), MAllocator(allocator) {
140
- BufData.resize (get_size ());
141
- BufPtr = reinterpret_cast <void *>(BufData.data ());
137
+ BufPtr = allocateHostMem ();
142
138
// We need cast BufPtr to pointer to the iteration type to get correct
143
139
// offset in std::copy when it will increment destination pointer.
144
140
typedef typename std::iterator_traits<InputIterator>::value_type value;
@@ -148,9 +144,10 @@ template <typename AllocatorT> class buffer_impl {
148
144
}
149
145
150
146
buffer_impl (cl_mem MemObject, const context &SyclContext,
151
- const size_t sizeInBytes, event AvailableEvent = {})
147
+ const size_t sizeInBytes, event AvailableEvent = {},
148
+ AllocatorT allocator = AllocatorT())
152
149
: OpenCLInterop(true ), SizeInBytes(sizeInBytes),
153
- AvailableEvent (AvailableEvent) {
150
+ AvailableEvent(AvailableEvent), MAllocator(allocator) {
154
151
if (SyclContext.is_host ())
155
152
throw cl::sycl::invalid_parameter_error (
156
153
" Creation of interoperability buffer using host context is not "
@@ -165,8 +162,7 @@ template <typename AllocatorT> class buffer_impl {
165
162
OCLState.Mem = MemObject;
166
163
CHECK_OCL_CODE (clRetainMemObject (MemObject));
167
164
168
- BufData.resize (get_size ());
169
- BufPtr = reinterpret_cast <void *>(BufData.data ());
165
+ BufPtr = allocateHostMem ();
170
166
}
171
167
172
168
size_t get_size () const { return SizeInBytes; }
@@ -184,6 +180,12 @@ template <typename AllocatorT> class buffer_impl {
184
180
185
181
if (OpenCLInterop)
186
182
CHECK_OCL_CODE_NO_EXC (clReleaseMemObject (OCLState.Mem ));
183
+
184
+ if (!Props.has_property <property::buffer::use_host_ptr>()) {
185
+ if (BufPtr)
186
+ MAllocator.deallocate (reinterpret_cast <
187
+ typename AllocatorT::pointer>(BufPtr), SizeInBytes);
188
+ }
187
189
}
188
190
189
191
void set_final_data (std::nullptr_t ) { uploadData = nullptr ; }
@@ -252,6 +254,13 @@ template <typename AllocatorT> class buffer_impl {
252
254
accessOffset);
253
255
}
254
256
257
+ inline void *allocateHostMem () {
258
+ size_t AllocatorValueSize = sizeof (typename AllocatorT::value_type);
259
+ size_t AllocationSize = get_size () / AllocatorValueSize;
260
+ AllocationSize += (get_size () % AllocatorValueSize) ? 1 : 0 ;
261
+ return MAllocator.allocate (AllocationSize);
262
+ }
263
+
255
264
template <typename propertyT> bool has_property () const {
256
265
return Props.has_property <propertyT>();
257
266
}
@@ -311,7 +320,6 @@ template <typename AllocatorT> class buffer_impl {
311
320
event AvailableEvent;
312
321
cl_context OpenCLContext = nullptr ;
313
322
void *BufPtr = nullptr ;
314
- vector_class<byte> BufData;
315
323
// TODO: enable support of cl_mem objects from multiple contexts
316
324
// TODO: at the current moment, using a buffer on multiple devices
317
325
// or on a device and a host simultaneously is not supported (the
0 commit comments