@@ -34,7 +34,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
34
34
ur_exp_command_buffer_handle_t *phCommandBuffer) {
35
35
36
36
ur_queue_handle_t Queue = nullptr ;
37
- UR_RETURN_ON_FAILURE (urQueueCreate (hContext, hDevice, nullptr , &Queue));
37
+ ur_queue_properties_t QueueProperties = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES,
38
+ nullptr , 0 };
39
+ const bool IsInOrder =
40
+ pCommandBufferDesc ? pCommandBufferDesc->isInOrder : false ;
41
+ if (!IsInOrder) {
42
+ QueueProperties.flags = UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
43
+ }
44
+ UR_RETURN_ON_FAILURE (
45
+ urQueueCreate (hContext, hDevice, &QueueProperties, &Queue));
38
46
39
47
cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
40
48
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr ;
@@ -66,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
66
74
67
75
try {
68
76
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
69
- Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable);
77
+ Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable, IsInOrder );
70
78
*phCommandBuffer = URCommandBuffer.release ();
71
79
} catch (...) {
72
80
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
@@ -147,11 +155,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
147
155
148
156
cl_command_properties_khr *Properties =
149
157
hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr ;
158
+
159
+ const bool IsInOrder = hCommandBuffer->IsInOrder ;
160
+ cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
161
+ const cl_sync_point_khr *SyncPointWaitList =
162
+ IsInOrder ? nullptr : pSyncPointWaitList;
163
+ uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
150
164
CL_RETURN_ON_FAILURE (clCommandNDRangeKernelKHR (
151
165
hCommandBuffer->CLCommandBuffer , nullptr , Properties,
152
166
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
153
- pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList ,
154
- pSyncPointWaitList, pSyncPoint , OutCommandHandle));
167
+ pGlobalWorkSize, pLocalWorkSize, WaitListSize, SyncPointWaitList ,
168
+ RetSyncPoint , OutCommandHandle));
155
169
156
170
try {
157
171
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
@@ -218,11 +232,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
218
232
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache ,
219
233
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR));
220
234
235
+ const bool IsInOrder = hCommandBuffer->IsInOrder ;
236
+ cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
237
+ const cl_sync_point_khr *SyncPointWaitList =
238
+ IsInOrder ? nullptr : pSyncPointWaitList;
239
+ uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
221
240
CL_RETURN_ON_FAILURE (clCommandCopyBufferKHR (
222
241
hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
223
242
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
224
- srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList ,
225
- pSyncPoint, nullptr ));
243
+ srcOffset, dstOffset, size, WaitListSize, SyncPointWaitList, RetSyncPoint ,
244
+ nullptr ));
226
245
227
246
return UR_RESULT_SUCCESS;
228
247
}
@@ -256,12 +275,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
256
275
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache ,
257
276
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR));
258
277
278
+ const bool IsInOrder = hCommandBuffer->IsInOrder ;
279
+ cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
280
+ const cl_sync_point_khr *SyncPointWaitList =
281
+ IsInOrder ? nullptr : pSyncPointWaitList;
282
+ uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
259
283
CL_RETURN_ON_FAILURE (clCommandCopyBufferRectKHR (
260
284
hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
261
285
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
262
286
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
263
- dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList ,
264
- pSyncPoint, nullptr ));
287
+ dstRowPitch, dstSlicePitch, WaitListSize, SyncPointWaitList, RetSyncPoint ,
288
+ nullptr ));
265
289
266
290
return UR_RESULT_SUCCESS;
267
291
}
@@ -360,10 +384,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
360
384
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache ,
361
385
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR));
362
386
387
+ const bool IsInOrder = hCommandBuffer->IsInOrder ;
388
+ cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
389
+ const cl_sync_point_khr *SyncPointWaitList =
390
+ IsInOrder ? nullptr : pSyncPointWaitList;
391
+ uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
363
392
CL_RETURN_ON_FAILURE (clCommandFillBufferKHR (
364
393
hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
365
394
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
366
- numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint , nullptr ));
395
+ WaitListSize, SyncPointWaitList, RetSyncPoint , nullptr ));
367
396
368
397
return UR_RESULT_SUCCESS;
369
398
}
0 commit comments