Skip to content

Commit 396759d

Browse files
[SYCL][CUDA] Map/unmap pinned host memory (#2098)
Changes map on pinned host memory to return a pointer to the pinned memory rather than copy to new memory. Likewise, unmap of pinned memory does no write operation. Signed-off-by: Steffen Larsen <[email protected]>
1 parent 9a8864c commit 396759d

File tree

2 files changed

+113
-9
lines changed

2 files changed

+113
-9
lines changed

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3975,6 +3975,8 @@ pi_result cuda_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image,
39753975

39763976
/// Implements mapping on the host using a BufferRead operation.
39773977
/// Mapped pointers are stored in the pi_mem object.
3978+
/// If the buffer uses pinned host memory a pointer to that memory is returned
3979+
/// and no read operation is done.
39783980
/// \TODO Untie types from OpenCL
39793981
///
39803982
pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
@@ -3984,11 +3986,14 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
39843986
pi_uint32 num_events_in_wait_list,
39853987
const pi_event *event_wait_list,
39863988
pi_event *event, void **ret_map) {
3987-
39883989
assert(ret_map != nullptr);
39893990
assert(command_queue != nullptr);
3991+
assert(buffer != nullptr);
3992+
assert(buffer->mem_type_ == _pi_mem::mem_type::buffer);
39903993

39913994
pi_result ret_err = PI_INVALID_OPERATION;
3995+
const bool is_pinned = buffer->mem_.buffer_mem_.allocMode_ ==
3996+
_pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr;
39923997

39933998
// Currently no support for overlapping regions
39943999
if (buffer->mem_.buffer_mem_.get_map_ptr() != nullptr) {
@@ -4002,15 +4007,21 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
40024007
ret_err = PI_SUCCESS;
40034008
}
40044009

4005-
if ((map_flags & CL_MAP_READ) || (map_flags & CL_MAP_WRITE)) {
4010+
if (!is_pinned && ((map_flags & CL_MAP_READ) || (map_flags & CL_MAP_WRITE))) {
4011+
// Pinned host memory is already on host so it doesn't need to be read.
40064012
ret_err = cuda_piEnqueueMemBufferRead(
40074013
command_queue, buffer, blocking_map, offset, size, hostPtr,
40084014
num_events_in_wait_list, event_wait_list, event);
40094015
} else {
4016+
ScopedContext active(command_queue->get_context());
4017+
4018+
if (is_pinned) {
4019+
ret_err = cuda_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
4020+
event_wait_list, nullptr);
4021+
}
4022+
40104023
if (event) {
40114024
try {
4012-
ScopedContext active(command_queue->get_context());
4013-
40144025
*event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP,
40154026
command_queue);
40164027
(*event)->start();
@@ -4026,6 +4037,7 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
40264037

40274038
/// Implements the unmap from the host, using a BufferWrite operation.
40284039
/// Requires the mapped pointer to be already registered in the given memobj.
4040+
/// If memobj uses pinned host memory, this will not do a write.
40294041
///
40304042
pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
40314043
void *mapped_ptr,
@@ -4037,22 +4049,33 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
40374049
assert(command_queue != nullptr);
40384050
assert(mapped_ptr != nullptr);
40394051
assert(memobj != nullptr);
4052+
assert(memobj->mem_type_ == _pi_mem::mem_type::buffer);
40404053
assert(memobj->mem_.buffer_mem_.get_map_ptr() != nullptr);
40414054
assert(memobj->mem_.buffer_mem_.get_map_ptr() == mapped_ptr);
40424055

4043-
if ((memobj->mem_.buffer_mem_.get_map_flags() & CL_MAP_WRITE) ||
4044-
(memobj->mem_.buffer_mem_.get_map_flags() &
4045-
CL_MAP_WRITE_INVALIDATE_REGION)) {
4056+
const bool is_pinned = memobj->mem_.buffer_mem_.allocMode_ ==
4057+
_pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr;
4058+
4059+
if (!is_pinned &&
4060+
((memobj->mem_.buffer_mem_.get_map_flags() & CL_MAP_WRITE) ||
4061+
(memobj->mem_.buffer_mem_.get_map_flags() &
4062+
CL_MAP_WRITE_INVALIDATE_REGION))) {
4063+
// Pinned host memory is only on host so it doesn't need to be written to.
40464064
ret_err = cuda_piEnqueueMemBufferWrite(
40474065
command_queue, memobj, true,
40484066
memobj->mem_.buffer_mem_.get_map_offset(mapped_ptr),
40494067
memobj->mem_.buffer_mem_.get_size(), mapped_ptr,
40504068
num_events_in_wait_list, event_wait_list, event);
40514069
} else {
4070+
ScopedContext active(command_queue->get_context());
4071+
4072+
if (is_pinned) {
4073+
ret_err = cuda_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
4074+
event_wait_list, nullptr);
4075+
}
4076+
40524077
if (event) {
40534078
try {
4054-
ScopedContext active(command_queue->get_context());
4055-
40564079
*event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
40574080
command_queue);
40584081
(*event)->start();

sycl/unittests/pi/cuda/test_mem_obj.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,84 @@ TEST_F(CudaTestMemObj, piMemBufferCreateNoActiveContext) {
113113
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
114114
PI_SUCCESS);
115115
}
116+
117+
TEST_F(CudaTestMemObj, piMemBufferPinnedMappedRead) {
118+
const size_t memSize = sizeof(int);
119+
const int value = 20;
120+
121+
pi_queue queue;
122+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piQueueCreate>(
123+
context_, device_, 0, &queue)),
124+
PI_SUCCESS);
125+
ASSERT_NE(queue, nullptr);
126+
ASSERT_EQ(queue->get_context(), context_);
127+
128+
pi_mem memObj;
129+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
130+
context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC,
131+
memSize, nullptr, &memObj)),
132+
PI_SUCCESS);
133+
134+
ASSERT_EQ(
135+
(plugin.call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(
136+
queue, memObj, true, 0, sizeof(int), &value, 0, nullptr, nullptr)),
137+
PI_SUCCESS);
138+
139+
int *host_ptr = nullptr;
140+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piEnqueueMemBufferMap>(
141+
queue, memObj, true, CL_MAP_READ, 0, sizeof(int), 0, nullptr,
142+
nullptr, (void **)&host_ptr)),
143+
PI_SUCCESS);
144+
145+
ASSERT_EQ(*host_ptr, value);
146+
147+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piEnqueueMemUnmap>(
148+
queue, memObj, host_ptr, 0, nullptr, nullptr)),
149+
PI_SUCCESS);
150+
151+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
152+
PI_SUCCESS);
153+
plugin.call<detail::PiApiKind::piQueueRelease>(queue);
154+
}
155+
156+
TEST_F(CudaTestMemObj, piMemBufferPinnedMappedWrite) {
157+
const size_t memSize = sizeof(int);
158+
const int value = 30;
159+
160+
pi_queue queue;
161+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piQueueCreate>(
162+
context_, device_, 0, &queue)),
163+
PI_SUCCESS);
164+
ASSERT_NE(queue, nullptr);
165+
ASSERT_EQ(queue->get_context(), context_);
166+
167+
pi_mem memObj;
168+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
169+
context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC,
170+
memSize, nullptr, &memObj)),
171+
PI_SUCCESS);
172+
173+
int *host_ptr = nullptr;
174+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piEnqueueMemBufferMap>(
175+
queue, memObj, true, CL_MAP_WRITE, 0, sizeof(int), 0, nullptr,
176+
nullptr, (void **)&host_ptr)),
177+
PI_SUCCESS);
178+
179+
*host_ptr = value;
180+
181+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piEnqueueMemUnmap>(
182+
queue, memObj, host_ptr, 0, nullptr, nullptr)),
183+
PI_SUCCESS);
184+
185+
int read_value = 0;
186+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piEnqueueMemBufferRead>(
187+
queue, memObj, true, 0, sizeof(int), &read_value, 0, nullptr,
188+
nullptr)),
189+
PI_SUCCESS);
190+
191+
ASSERT_EQ(read_value, value);
192+
193+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
194+
PI_SUCCESS);
195+
plugin.call<detail::PiApiKind::piQueueRelease>(queue);
196+
}

0 commit comments

Comments
 (0)