Skip to content

Commit a420e7a

Browse files
[SYCL][CUDA] Fix events for enqueue commands (#2028)
This commit ensures that `piEnqueueMemBufferCopy`, `piEnqueueMemBufferFill` and other commands call both `start()` and `record()` correctly if events are requested from them. An assertion is added to ensure this always happens from now on. Some command types are also changed to reflect the actual command, for example `cuda_piEnqueueMemBufferReadRect` not using the `RECT` version of the buffer read command type. Some event parameter names are also changed to provide naming consistency throughout the code. Signed-off-by: Przemek Malon <[email protected]>
1 parent c099e47 commit a420e7a

File tree

1 file changed

+54
-40
lines changed

1 file changed

+54
-40
lines changed

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 54 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ pi_uint64 _pi_event::get_end_time() const {
363363

364364
pi_result _pi_event::record() {
365365

366-
if (is_recorded()) {
366+
if (is_recorded() || !is_started()) {
367367
return PI_INVALID_EVENT;
368368
}
369369

@@ -2074,7 +2074,7 @@ pi_result cuda_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer,
20742074
size_t size, void *ptr,
20752075
pi_uint32 num_events_in_wait_list,
20762076
const pi_event *event_wait_list,
2077-
pi_event *retEvent) {
2077+
pi_event *event) {
20782078

20792079
assert(buffer != nullptr);
20802080
assert(command_queue != nullptr);
@@ -2089,7 +2089,7 @@ pi_result cuda_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer,
20892089
retErr = cuda_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
20902090
event_wait_list, nullptr);
20912091

2092-
if (retEvent) {
2092+
if (event) {
20932093
retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
20942094
PI_COMMAND_TYPE_MEM_BUFFER_READ, command_queue));
20952095
retImplEv->start();
@@ -2098,16 +2098,16 @@ pi_result cuda_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer,
20982098
retErr =
20992099
PI_CHECK_ERROR(cuMemcpyDtoHAsync(ptr, devPtr + offset, size, cuStream));
21002100

2101-
if (retEvent) {
2101+
if (event) {
21022102
retErr = retImplEv->record();
21032103
}
21042104

21052105
if (blocking_read) {
21062106
retErr = PI_CHECK_ERROR(cuStreamSynchronize(cuStream));
21072107
}
21082108

2109-
if (retEvent) {
2110-
*retEvent = retImplEv.release();
2109+
if (event) {
2110+
*event = retImplEv.release();
21112111
}
21122112

21132113
} catch (pi_result err) {
@@ -3381,7 +3381,7 @@ pi_result cuda_piEnqueueMemBufferReadRect(
33813381
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
33823382
size_t host_row_pitch, size_t host_slice_pitch, void *ptr,
33833383
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
3384-
pi_event *retEvent) {
3384+
pi_event *event) {
33853385

33863386
assert(buffer != nullptr);
33873387
assert(command_queue != nullptr);
@@ -3397,9 +3397,9 @@ pi_result cuda_piEnqueueMemBufferReadRect(
33973397
retErr = cuda_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
33983398
event_wait_list, nullptr);
33993399

3400-
if (retEvent) {
3400+
if (event) {
34013401
retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
3402-
PI_COMMAND_TYPE_MEM_BUFFER_READ, command_queue));
3402+
PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT, command_queue));
34033403
retImplEv->start();
34043404
}
34053405

@@ -3408,16 +3408,16 @@ pi_result cuda_piEnqueueMemBufferReadRect(
34083408
buffer_row_pitch, buffer_slice_pitch, ptr, CU_MEMORYTYPE_HOST,
34093409
host_offset, host_row_pitch, host_slice_pitch);
34103410

3411-
if (retEvent) {
3411+
if (event) {
34123412
retErr = retImplEv->record();
34133413
}
34143414

34153415
if (blocking_read) {
34163416
retErr = PI_CHECK_ERROR(cuStreamSynchronize(cuStream));
34173417
}
34183418

3419-
if (retEvent) {
3420-
*retEvent = retImplEv.release();
3419+
if (event) {
3420+
*event = retImplEv.release();
34213421
}
34223422

34233423
} catch (pi_result err) {
@@ -3432,7 +3432,7 @@ pi_result cuda_piEnqueueMemBufferWriteRect(
34323432
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
34333433
size_t host_row_pitch, size_t host_slice_pitch, const void *ptr,
34343434
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
3435-
pi_event *retEvent) {
3435+
pi_event *event) {
34363436

34373437
assert(buffer != nullptr);
34383438
assert(command_queue != nullptr);
@@ -3448,9 +3448,9 @@ pi_result cuda_piEnqueueMemBufferWriteRect(
34483448
retErr = cuda_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
34493449
event_wait_list, nullptr);
34503450

3451-
if (retEvent) {
3451+
if (event) {
34523452
retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
3453-
PI_COMMAND_TYPE_MEM_BUFFER_WRITE, command_queue));
3453+
PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT, command_queue));
34543454
retImplEv->start();
34553455
}
34563456

@@ -3459,16 +3459,16 @@ pi_result cuda_piEnqueueMemBufferWriteRect(
34593459
host_slice_pitch, &devPtr, CU_MEMORYTYPE_DEVICE, buffer_offset,
34603460
buffer_row_pitch, buffer_slice_pitch);
34613461

3462-
if (retEvent) {
3462+
if (event) {
34633463
retErr = retImplEv->record();
34643464
}
34653465

34663466
if (blocking_write) {
34673467
retErr = PI_CHECK_ERROR(cuStreamSynchronize(cuStream));
34683468
}
34693469

3470-
if (retEvent) {
3471-
*retEvent = retImplEv.release();
3470+
if (event) {
3471+
*event = retImplEv.release();
34723472
}
34733473

34743474
} catch (pi_result err) {
@@ -3487,6 +3487,8 @@ pi_result cuda_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,
34873487
return PI_INVALID_QUEUE;
34883488
}
34893489

3490+
std::unique_ptr<_pi_event> retImplEv{nullptr};
3491+
34903492
try {
34913493
ScopedContext active(command_queue->get_context());
34923494

@@ -3497,17 +3499,21 @@ pi_result cuda_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,
34973499

34983500
pi_result result;
34993501

3502+
if (event) {
3503+
retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
3504+
PI_COMMAND_TYPE_MEM_BUFFER_COPY, command_queue));
3505+
result = retImplEv->start();
3506+
}
3507+
35003508
auto stream = command_queue->get();
35013509
auto src = src_buffer->mem_.buffer_mem_.get() + src_offset;
35023510
auto dst = dst_buffer->mem_.buffer_mem_.get() + dst_offset;
35033511

35043512
result = PI_CHECK_ERROR(cuMemcpyDtoDAsync(dst, src, size, stream));
35053513

35063514
if (event) {
3507-
auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_COPY,
3508-
command_queue);
3509-
new_event->record();
3510-
*event = new_event;
3515+
result = retImplEv->record();
3516+
*event = retImplEv.release();
35113517
}
35123518

35133519
return result;
@@ -3543,7 +3549,7 @@ pi_result cuda_piEnqueueMemBufferCopyRect(
35433549

35443550
if (event) {
35453551
retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
3546-
PI_COMMAND_TYPE_MEM_BUFFER_COPY, command_queue));
3552+
PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT, command_queue));
35473553
retImplEv->start();
35483554
}
35493555

@@ -3586,6 +3592,8 @@ pi_result cuda_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
35863592
(void)pattern_is_valid;
35873593
(void)pattern_size_is_valid;
35883594

3595+
std::unique_ptr<_pi_event> retImplEv{nullptr};
3596+
35893597
try {
35903598
ScopedContext active(command_queue->get_context());
35913599

@@ -3596,6 +3604,12 @@ pi_result cuda_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
35963604

35973605
pi_result result;
35983606

3607+
if (event) {
3608+
retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
3609+
PI_COMMAND_TYPE_MEM_BUFFER_FILL, command_queue));
3610+
result = retImplEv->start();
3611+
}
3612+
35993613
auto dstDevice = buffer->mem_.buffer_mem_.get() + offset;
36003614
auto stream = command_queue->get();
36013615
auto N = size / pattern_size;
@@ -3646,10 +3660,8 @@ pi_result cuda_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
36463660
}
36473661

36483662
if (event) {
3649-
auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_FILL,
3650-
command_queue);
3651-
new_event->record();
3652-
*event = new_event;
3663+
result = retImplEv->record();
3664+
*event = retImplEv.release();
36533665
}
36543666

36553667
return result;
@@ -3971,7 +3983,7 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
39713983
size_t size,
39723984
pi_uint32 num_events_in_wait_list,
39733985
const pi_event *event_wait_list,
3974-
pi_event *retEvent, void **ret_map) {
3986+
pi_event *event, void **ret_map) {
39753987

39763988
assert(ret_map != nullptr);
39773989
assert(command_queue != nullptr);
@@ -3993,15 +4005,16 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
39934005
if ((map_flags & CL_MAP_READ) || (map_flags & CL_MAP_WRITE)) {
39944006
ret_err = cuda_piEnqueueMemBufferRead(
39954007
command_queue, buffer, blocking_map, offset, size, hostPtr,
3996-
num_events_in_wait_list, event_wait_list, retEvent);
4008+
num_events_in_wait_list, event_wait_list, event);
39974009
} else {
3998-
if (retEvent) {
4010+
if (event) {
39994011
try {
40004012
ScopedContext active(command_queue->get_context());
40014013

4002-
*retEvent = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP,
4003-
command_queue);
4004-
(*retEvent)->record();
4014+
*event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP,
4015+
command_queue);
4016+
(*event)->start();
4017+
(*event)->record();
40054018
} catch (pi_result error) {
40064019
ret_err = error;
40074020
}
@@ -4018,7 +4031,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
40184031
void *mapped_ptr,
40194032
pi_uint32 num_events_in_wait_list,
40204033
const pi_event *event_wait_list,
4021-
pi_event *retEvent) {
4034+
pi_event *event) {
40224035
pi_result ret_err = PI_SUCCESS;
40234036

40244037
assert(command_queue != nullptr);
@@ -4034,15 +4047,16 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
40344047
command_queue, memobj, true,
40354048
memobj->mem_.buffer_mem_.get_map_offset(mapped_ptr),
40364049
memobj->mem_.buffer_mem_.get_size(), mapped_ptr,
4037-
num_events_in_wait_list, event_wait_list, retEvent);
4050+
num_events_in_wait_list, event_wait_list, event);
40384051
} else {
4039-
if (retEvent) {
4052+
if (event) {
40404053
try {
40414054
ScopedContext active(command_queue->get_context());
40424055

4043-
*retEvent = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
4044-
command_queue);
4045-
(*retEvent)->record();
4056+
*event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
4057+
command_queue);
4058+
(*event)->start();
4059+
(*event)->record();
40464060
} catch (pi_result error) {
40474061
ret_err = error;
40484062
}
@@ -4155,7 +4169,7 @@ pi_result cuda_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value,
41554169
events_waitlist, nullptr);
41564170
if (event) {
41574171
event_ptr = std::unique_ptr<_pi_event>(
4158-
_pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_COPY, queue));
4172+
_pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_FILL, queue));
41594173
event_ptr->start();
41604174
}
41614175
result = PI_CHECK_ERROR(cuMemsetD8Async(

0 commit comments

Comments
 (0)