Skip to content

Commit 988c477

Browse files
authored
[UR] In-order path for OpenCL command-buffers (#17056)
After the [spec bump of cl_khr_command_buffer to 0.9.7](https://github.com/KhronosGroup/OpenCL-Docs/), in the OpenCL adapter we no longer need to worry about the in-order/out-of-order property of the internal queue used on command-command-buffer creation matching the queue used to enqueue the command-buffer. We can therefore take advantage of the in-order flag passed on UR command-buffer creation to use an in-order queue for command-buffer creation, and omit using sync points. **Note:** This UR patch was previously approved and ready-to-merge prior to the UR repo move in oneapi-src/unified-runtime#2681
1 parent 69941b8 commit 988c477

File tree

2 files changed

+42
-11
lines changed

2 files changed

+42
-11
lines changed

unified-runtime/source/adapters/opencl/command_buffer.cpp

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
3434
ur_exp_command_buffer_handle_t *phCommandBuffer) {
3535

3636
ur_queue_handle_t Queue = nullptr;
37-
UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue));
37+
ur_queue_properties_t QueueProperties = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES,
38+
nullptr, 0};
39+
const bool IsInOrder =
40+
pCommandBufferDesc ? pCommandBufferDesc->isInOrder : false;
41+
if (!IsInOrder) {
42+
QueueProperties.flags = UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
43+
}
44+
UR_RETURN_ON_FAILURE(
45+
urQueueCreate(hContext, hDevice, &QueueProperties, &Queue));
3846

3947
cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
4048
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
@@ -66,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
6674

6775
try {
6876
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
69-
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable);
77+
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable, IsInOrder);
7078
*phCommandBuffer = URCommandBuffer.release();
7179
} catch (...) {
7280
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
@@ -147,11 +155,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
147155

148156
cl_command_properties_khr *Properties =
149157
hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr;
158+
159+
const bool IsInOrder = hCommandBuffer->IsInOrder;
160+
cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
161+
const cl_sync_point_khr *SyncPointWaitList =
162+
IsInOrder ? nullptr : pSyncPointWaitList;
163+
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
150164
CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR(
151165
hCommandBuffer->CLCommandBuffer, nullptr, Properties,
152166
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
153-
pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList,
154-
pSyncPointWaitList, pSyncPoint, OutCommandHandle));
167+
pGlobalWorkSize, pLocalWorkSize, WaitListSize, SyncPointWaitList,
168+
RetSyncPoint, OutCommandHandle));
155169

156170
try {
157171
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
@@ -218,11 +232,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
218232
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache,
219233
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR));
220234

235+
const bool IsInOrder = hCommandBuffer->IsInOrder;
236+
cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
237+
const cl_sync_point_khr *SyncPointWaitList =
238+
IsInOrder ? nullptr : pSyncPointWaitList;
239+
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
221240
CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR(
222241
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
223242
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
224-
srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList,
225-
pSyncPoint, nullptr));
243+
srcOffset, dstOffset, size, WaitListSize, SyncPointWaitList, RetSyncPoint,
244+
nullptr));
226245

227246
return UR_RESULT_SUCCESS;
228247
}
@@ -256,12 +275,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
256275
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache,
257276
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR));
258277

278+
const bool IsInOrder = hCommandBuffer->IsInOrder;
279+
cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
280+
const cl_sync_point_khr *SyncPointWaitList =
281+
IsInOrder ? nullptr : pSyncPointWaitList;
282+
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
259283
CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR(
260284
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
261285
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
262286
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
263-
dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList,
264-
pSyncPoint, nullptr));
287+
dstRowPitch, dstSlicePitch, WaitListSize, SyncPointWaitList, RetSyncPoint,
288+
nullptr));
265289

266290
return UR_RESULT_SUCCESS;
267291
}
@@ -360,10 +384,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
360384
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache,
361385
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR));
362386

387+
const bool IsInOrder = hCommandBuffer->IsInOrder;
388+
cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
389+
const cl_sync_point_khr *SyncPointWaitList =
390+
IsInOrder ? nullptr : pSyncPointWaitList;
391+
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
363392
CL_RETURN_ON_FAILURE(clCommandFillBufferKHR(
364393
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
365394
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
366-
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));
395+
WaitListSize, SyncPointWaitList, RetSyncPoint, nullptr));
367396

368397
return UR_RESULT_SUCCESS;
369398
}

unified-runtime/source/adapters/opencl/command_buffer.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ struct ur_exp_command_buffer_handle_t_ {
4646
/// Set to true if the kernel commands in the command-buffer can be updated,
4747
/// false otherwise
4848
bool IsUpdatable;
49+
/// Set to true if the command-buffer was created with an in-order queue.
50+
bool IsInOrder;
4951
/// Set to true if the command-buffer has been finalized, false otherwise
5052
bool IsFinalized;
5153
/// List of commands in the command-buffer.
@@ -58,10 +60,10 @@ struct ur_exp_command_buffer_handle_t_ {
5860
ur_context_handle_t hContext,
5961
ur_device_handle_t hDevice,
6062
cl_command_buffer_khr CLCommandBuffer,
61-
bool IsUpdatable)
63+
bool IsUpdatable, bool IsInOrder)
6264
: hInternalQueue(hQueue), hContext(hContext), hDevice(hDevice),
6365
CLCommandBuffer(CLCommandBuffer), IsUpdatable(IsUpdatable),
64-
IsFinalized(false), RefCount(0) {}
66+
IsInOrder(IsInOrder), IsFinalized(false), RefCount(0) {}
6567

6668
~ur_exp_command_buffer_handle_t_();
6769

0 commit comments

Comments
 (0)