@@ -3975,6 +3975,8 @@ pi_result cuda_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image,
3975
3975
3976
3976
// / Implements mapping on the host using a BufferRead operation.
3977
3977
// / Mapped pointers are stored in the pi_mem object.
3978
+ // / If the buffer uses pinned host memory a pointer to that memory is returned
3979
+ // / and no read operation is done.
3978
3980
// / \TODO Untie types from OpenCL
3979
3981
// /
3980
3982
pi_result cuda_piEnqueueMemBufferMap (pi_queue command_queue, pi_mem buffer,
@@ -3984,11 +3986,14 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
3984
3986
pi_uint32 num_events_in_wait_list,
3985
3987
const pi_event *event_wait_list,
3986
3988
pi_event *event, void **ret_map) {
3987
-
3988
3989
assert (ret_map != nullptr );
3989
3990
assert (command_queue != nullptr );
3991
+ assert (buffer != nullptr );
3992
+ assert (buffer->mem_type_ == _pi_mem::mem_type::buffer);
3990
3993
3991
3994
pi_result ret_err = PI_INVALID_OPERATION;
3995
+ const bool is_pinned = buffer->mem_ .buffer_mem_ .allocMode_ ==
3996
+ _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr;
3992
3997
3993
3998
// Currently no support for overlapping regions
3994
3999
if (buffer->mem_ .buffer_mem_ .get_map_ptr () != nullptr ) {
@@ -4002,15 +4007,21 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
4002
4007
ret_err = PI_SUCCESS;
4003
4008
}
4004
4009
4005
- if ((map_flags & CL_MAP_READ) || (map_flags & CL_MAP_WRITE)) {
4010
+ if (!is_pinned && ((map_flags & CL_MAP_READ) || (map_flags & CL_MAP_WRITE))) {
4011
+ // Pinned host memory is already on host so it doesn't need to be read.
4006
4012
ret_err = cuda_piEnqueueMemBufferRead (
4007
4013
command_queue, buffer, blocking_map, offset, size, hostPtr,
4008
4014
num_events_in_wait_list, event_wait_list, event);
4009
4015
} else {
4016
+ ScopedContext active (command_queue->get_context ());
4017
+
4018
+ if (is_pinned) {
4019
+ ret_err = cuda_piEnqueueEventsWait (command_queue, num_events_in_wait_list,
4020
+ event_wait_list, nullptr );
4021
+ }
4022
+
4010
4023
if (event) {
4011
4024
try {
4012
- ScopedContext active (command_queue->get_context ());
4013
-
4014
4025
*event = _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_MAP,
4015
4026
command_queue);
4016
4027
(*event)->start ();
@@ -4026,6 +4037,7 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
4026
4037
4027
4038
// / Implements the unmap from the host, using a BufferWrite operation.
4028
4039
// / Requires the mapped pointer to be already registered in the given memobj.
4040
+ // / If memobj uses pinned host memory, this will not do a write.
4029
4041
// /
4030
4042
pi_result cuda_piEnqueueMemUnmap (pi_queue command_queue, pi_mem memobj,
4031
4043
void *mapped_ptr,
@@ -4037,22 +4049,33 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
4037
4049
assert (command_queue != nullptr );
4038
4050
assert (mapped_ptr != nullptr );
4039
4051
assert (memobj != nullptr );
4052
+ assert (memobj->mem_type_ == _pi_mem::mem_type::buffer);
4040
4053
assert (memobj->mem_ .buffer_mem_ .get_map_ptr () != nullptr );
4041
4054
assert (memobj->mem_ .buffer_mem_ .get_map_ptr () == mapped_ptr);
4042
4055
4043
- if ((memobj->mem_ .buffer_mem_ .get_map_flags () & CL_MAP_WRITE) ||
4044
- (memobj->mem_ .buffer_mem_ .get_map_flags () &
4045
- CL_MAP_WRITE_INVALIDATE_REGION)) {
4056
+ const bool is_pinned = memobj->mem_ .buffer_mem_ .allocMode_ ==
4057
+ _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr;
4058
+
4059
+ if (!is_pinned &&
4060
+ ((memobj->mem_ .buffer_mem_ .get_map_flags () & CL_MAP_WRITE) ||
4061
+ (memobj->mem_ .buffer_mem_ .get_map_flags () &
4062
+ CL_MAP_WRITE_INVALIDATE_REGION))) {
4063
+ // Pinned host memory is only on host so it doesn't need to be written to.
4046
4064
ret_err = cuda_piEnqueueMemBufferWrite (
4047
4065
command_queue, memobj, true ,
4048
4066
memobj->mem_ .buffer_mem_ .get_map_offset (mapped_ptr),
4049
4067
memobj->mem_ .buffer_mem_ .get_size (), mapped_ptr,
4050
4068
num_events_in_wait_list, event_wait_list, event);
4051
4069
} else {
4070
+ ScopedContext active (command_queue->get_context ());
4071
+
4072
+ if (is_pinned) {
4073
+ ret_err = cuda_piEnqueueEventsWait (command_queue, num_events_in_wait_list,
4074
+ event_wait_list, nullptr );
4075
+ }
4076
+
4052
4077
if (event) {
4053
4078
try {
4054
- ScopedContext active (command_queue->get_context ());
4055
-
4056
4079
*event = _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
4057
4080
command_queue);
4058
4081
(*event)->start ();
0 commit comments