Skip to content

Commit 02e7339

Browse files
Modified spec wording and made creating an event branchless on CUDA and HIP
1 parent 02cc1e3 commit 02e7339

File tree

8 files changed

+21
-12
lines changed

8 files changed

+21
-12
lines changed

unified-runtime/include/ur_api.h

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/scripts/core/exp-command-buffer.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ returns:
310310
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
311311
--- #--------------------------------------------------------------------------
312312
type: function
313-
desc: "Decrement the command-buffer object's reference count and delete the command-buffer object if the reference count becomes zero. It will try synchronizing the command-buffer, hence it is legal to call it while command-buffer is still executing."
313+
desc: "Decrement the command-buffer object's reference count and delete the command-buffer object if the reference count becomes zero. It is legal to call the entry-point while hCommandBuffer is still executing, which will block on completion if the reference count of hCommandBuffer becomes zero."
314314
class: $xCommandBuffer
315315
name: ReleaseExp
316316
params:

unified-runtime/source/adapters/cuda/command_buffer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,10 +1172,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCommandBufferExp(
11721172
// Launch graph
11731173
UR_CHECK_ERROR(cuGraphLaunch(hCommandBuffer->CudaGraphExec, CuStream));
11741174

1175+
UR_CHECK_ERROR(RetImplEvent->record());
1176+
hCommandBuffer->CurrentExecution = RetImplEvent.release();
11751177
if (phEvent) {
1176-
UR_CHECK_ERROR(RetImplEvent->record());
1177-
*phEvent = RetImplEvent.release();
1178-
hCommandBuffer->CurrentExecution = *phEvent;
1178+
*phEvent = hCommandBuffer->CurrentExecution;
11791179
}
11801180
return UR_RESULT_SUCCESS;
11811181
} catch (ur_result_t Err) {

unified-runtime/source/adapters/cuda/command_buffer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ struct ur_exp_command_buffer_handle_t_ : ur::cuda::handle_base {
192192
// Atomic variable counting the number of reference to this command_buffer
193193
// using std::atomic prevents data race when incrementing/decrementing.
194194
std::atomic_uint32_t RefCount;
195-
// The event of current graph execution.
195+
// Track the event of the current graph execution.
196196
ur_event_handle_t CurrentExecution = nullptr;
197197

198198
// Ordered map of sync_points to ur_events, so that we can find the last

unified-runtime/source/adapters/hip/command_buffer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -811,10 +811,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCommandBufferExp(
811811
// Launch graph
812812
UR_CHECK_ERROR(hipGraphLaunch(hCommandBuffer->HIPGraphExec, HIPStream));
813813

814+
UR_CHECK_ERROR(RetImplEvent->record());
815+
hCommandBuffer->CurrentExecution = RetImplEvent.release();
814816
if (phEvent) {
815-
UR_CHECK_ERROR(RetImplEvent->record());
816-
*phEvent = RetImplEvent.release();
817-
hCommandBuffer->CurrentExecution = *phEvent;
817+
*phEvent = hCommandBuffer->CurrentExecution;
818818
}
819819
} catch (ur_result_t Err) {
820820
return Err;

unified-runtime/source/adapters/hip/command_buffer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ struct ur_exp_command_buffer_handle_t_ : ur::hip::handle_base {
128128
// Atomic variable counting the number of reference to this command_buffer
129129
// using std::atomic prevents data race when incrementing/decrementing.
130130
std::atomic_uint32_t RefCount;
131-
// The event of current graph execution.
131+
// Track the event of the current graph execution.
132132
ur_event_handle_t CurrentExecution = nullptr;
133133

134134
// Ordered map of sync_points to ur_events

unified-runtime/source/loader/ur_libapi.cpp

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/ur_api.cpp

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)