10
10
11
11
#include " command_buffer.hpp"
12
12
#include " common.hpp"
13
+ #include " context.hpp"
14
+ #include " event.hpp"
15
+ #include " kernel.hpp"
16
+ #include " memory.hpp"
17
+ #include " queue.hpp"
13
18
14
19
// / The ur_exp_command_buffer_handle_t_ destructor calls CL release
15
20
// / command-buffer to free the underlying object.
16
21
ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_ () {
17
22
urQueueRelease (hInternalQueue);
18
23
19
- cl_context CLContext = cl_adapter::cast<cl_context>( hContext) ;
24
+ cl_context CLContext = hContext-> CLContext ;
20
25
cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr ;
21
26
cl_int Res =
22
27
cl_ext::getExtFuncFromContext<decltype (clReleaseCommandBufferKHR)>(
@@ -43,7 +48,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
43
48
UR_RETURN_ON_FAILURE (
44
49
urQueueCreate (hContext, hDevice, &QueueProperties, &Queue));
45
50
46
- cl_context CLContext = cl_adapter::cast<cl_context>( hContext) ;
51
+ cl_context CLContext = hContext-> CLContext ;
47
52
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr ;
48
53
UR_RETURN_ON_FAILURE (
49
54
cl_ext::getExtFuncFromContext<decltype (clCreateCommandBufferKHR)>(
@@ -53,7 +58,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
53
58
const bool IsUpdatable = pCommandBufferDesc->isUpdatable ;
54
59
55
60
ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
56
- cl_device_id CLDevice = cl_adapter::cast<cl_device_id>( hDevice) ;
61
+ cl_device_id CLDevice = hDevice-> CLDevice ;
57
62
CL_RETURN_ON_FAILURE (
58
63
getDeviceCommandBufferUpdateCapabilities (CLDevice, UpdateCapabilities));
59
64
bool DeviceSupportsUpdate = UpdateCapabilities > 0 ;
@@ -67,16 +72,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
67
72
IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u , 0 };
68
73
69
74
cl_int Res = CL_SUCCESS;
70
- auto CLCommandBuffer = clCreateCommandBufferKHR (
71
- 1 , cl_adapter::cast<cl_command_queue *>(&Queue), Properties, &Res);
75
+ const cl_command_queue CLQueue = Queue->CLQueue ;
76
+ auto CLCommandBuffer =
77
+ clCreateCommandBufferKHR (1 , &CLQueue, Properties, &Res);
72
78
CL_RETURN_ON_FAILURE_AND_SET_NULL (Res, phCommandBuffer);
73
79
74
80
try {
75
81
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
76
82
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable, IsInOrder);
77
83
*phCommandBuffer = URCommandBuffer.release ();
78
- } catch (... ) {
84
+ } catch (std::bad_alloc & ) {
79
85
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
86
+ } catch (...) {
87
+ return UR_RESULT_ERROR_UNKNOWN;
80
88
}
81
89
82
90
CL_RETURN_ON_FAILURE (Res);
@@ -101,7 +109,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
101
109
UR_APIEXPORT ur_result_t UR_APICALL
102
110
urCommandBufferFinalizeExp (ur_exp_command_buffer_handle_t hCommandBuffer) {
103
111
UR_ASSERT (!hCommandBuffer->IsFinalized , UR_RESULT_ERROR_INVALID_OPERATION);
104
- cl_context CLContext = cl_adapter::cast<cl_context>( hCommandBuffer->hContext ) ;
112
+ cl_context CLContext = hCommandBuffer->hContext -> CLContext ;
105
113
cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr ;
106
114
UR_RETURN_ON_FAILURE (
107
115
cl_ext::getExtFuncFromContext<decltype (clFinalizeCommandBufferKHR)>(
@@ -133,7 +141,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
133
141
UR_ASSERT (!(phCommandHandle && !hCommandBuffer->IsUpdatable ),
134
142
UR_RESULT_ERROR_INVALID_OPERATION);
135
143
136
- cl_context CLContext = cl_adapter::cast<cl_context>( hCommandBuffer->hContext ) ;
144
+ cl_context CLContext = hCommandBuffer->hContext -> CLContext ;
137
145
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr ;
138
146
UR_RETURN_ON_FAILURE (
139
147
cl_ext::getExtFuncFromContext<decltype (clCommandNDRangeKernelKHR)>(
@@ -161,10 +169,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
161
169
IsInOrder ? nullptr : pSyncPointWaitList;
162
170
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
163
171
CL_RETURN_ON_FAILURE (clCommandNDRangeKernelKHR (
164
- hCommandBuffer->CLCommandBuffer , nullptr , Properties,
165
- cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
166
- pGlobalWorkSize, pLocalWorkSize, WaitListSize, SyncPointWaitList,
167
- RetSyncPoint, OutCommandHandle));
172
+ hCommandBuffer->CLCommandBuffer , nullptr , Properties, hKernel->CLKernel ,
173
+ workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, WaitListSize,
174
+ SyncPointWaitList, RetSyncPoint, OutCommandHandle));
168
175
169
176
try {
170
177
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
@@ -224,7 +231,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
224
231
(void )phEventWaitList;
225
232
(void )phEvent;
226
233
(void )phCommand;
227
- cl_context CLContext = cl_adapter::cast<cl_context>( hCommandBuffer->hContext ) ;
234
+ cl_context CLContext = hCommandBuffer->hContext -> CLContext ;
228
235
cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr ;
229
236
UR_RETURN_ON_FAILURE (
230
237
cl_ext::getExtFuncFromContext<decltype (clCommandCopyBufferKHR)>(
@@ -237,10 +244,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
237
244
IsInOrder ? nullptr : pSyncPointWaitList;
238
245
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
239
246
CL_RETURN_ON_FAILURE (clCommandCopyBufferKHR (
240
- hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
241
- cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
242
- srcOffset, dstOffset, size, WaitListSize, SyncPointWaitList, RetSyncPoint,
243
- nullptr ));
247
+ hCommandBuffer->CLCommandBuffer , nullptr , nullptr , hSrcMem->CLMemory ,
248
+ hDstMem->CLMemory , srcOffset, dstOffset, size, WaitListSize,
249
+ SyncPointWaitList, RetSyncPoint, nullptr ));
244
250
245
251
return UR_RESULT_SUCCESS;
246
252
}
@@ -267,7 +273,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
267
273
size_t OpenCLDstRect[3 ]{dstOrigin.x , dstOrigin.y , dstOrigin.z };
268
274
size_t OpenCLRegion[3 ]{region.width , region.height , region.depth };
269
275
270
- cl_context CLContext = cl_adapter::cast<cl_context>( hCommandBuffer->hContext ) ;
276
+ cl_context CLContext = hCommandBuffer->hContext -> CLContext ;
271
277
cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr ;
272
278
UR_RETURN_ON_FAILURE (
273
279
cl_ext::getExtFuncFromContext<decltype (clCommandCopyBufferRectKHR)>(
@@ -280,11 +286,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
280
286
IsInOrder ? nullptr : pSyncPointWaitList;
281
287
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
282
288
CL_RETURN_ON_FAILURE (clCommandCopyBufferRectKHR (
283
- hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
284
- cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
285
- OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
286
- dstRowPitch, dstSlicePitch, WaitListSize, SyncPointWaitList, RetSyncPoint,
287
- nullptr ));
289
+ hCommandBuffer->CLCommandBuffer , nullptr , nullptr , hSrcMem->CLMemory ,
290
+ hDstMem->CLMemory , OpenCLOriginRect, OpenCLDstRect, OpenCLRegion,
291
+ srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, WaitListSize,
292
+ SyncPointWaitList, RetSyncPoint, nullptr ));
288
293
289
294
return UR_RESULT_SUCCESS;
290
295
}
@@ -376,7 +381,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
376
381
[[maybe_unused]] ur_event_handle_t *phEvent,
377
382
[[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) {
378
383
379
- cl_context CLContext = cl_adapter::cast<cl_context>( hCommandBuffer->hContext ) ;
384
+ cl_context CLContext = hCommandBuffer->hContext -> CLContext ;
380
385
cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr ;
381
386
UR_RETURN_ON_FAILURE (
382
387
cl_ext::getExtFuncFromContext<decltype (clCommandFillBufferKHR)>(
@@ -389,9 +394,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
389
394
IsInOrder ? nullptr : pSyncPointWaitList;
390
395
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
391
396
CL_RETURN_ON_FAILURE (clCommandFillBufferKHR (
392
- hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
393
- cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
394
- WaitListSize, SyncPointWaitList, RetSyncPoint, nullptr ));
397
+ hCommandBuffer->CLCommandBuffer , nullptr , nullptr , hBuffer-> CLMemory ,
398
+ pPattern, patternSize, offset, size, WaitListSize, SyncPointWaitList ,
399
+ RetSyncPoint, nullptr ));
395
400
396
401
return UR_RESULT_SUCCESS;
397
402
}
@@ -447,21 +452,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCommandBufferExp(
447
452
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
448
453
ur_event_handle_t *phEvent) {
449
454
450
- cl_context CLContext = cl_adapter::cast<cl_context>( hCommandBuffer->hContext ) ;
455
+ cl_context CLContext = hCommandBuffer->hContext -> CLContext ;
451
456
cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr ;
452
457
UR_RETURN_ON_FAILURE (
453
458
cl_ext::getExtFuncFromContext<decltype (clEnqueueCommandBufferKHR)>(
454
459
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache ,
455
460
cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR));
456
461
457
462
const uint32_t NumberOfQueues = 1 ;
458
-
463
+ cl_event Event;
464
+ std::vector<cl_event> CLWaitEvents (numEventsInWaitList);
465
+ for (uint32_t i = 0 ; i < numEventsInWaitList; i++) {
466
+ CLWaitEvents[i] = phEventWaitList[i]->CLEvent ;
467
+ }
468
+ cl_command_queue CLQueue = hQueue->CLQueue ;
459
469
CL_RETURN_ON_FAILURE (clEnqueueCommandBufferKHR (
460
- NumberOfQueues, cl_adapter::cast<cl_command_queue *>(&hQueue),
461
- hCommandBuffer->CLCommandBuffer , numEventsInWaitList,
462
- cl_adapter::cast<const cl_event *>(phEventWaitList),
463
- cl_adapter::cast<cl_event *>(phEvent)));
470
+ NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer ,
471
+ numEventsInWaitList, CLWaitEvents.data (), ifUrEvent (phEvent, Event)));
464
472
473
+ UR_RETURN_ON_FAILURE (createUREvent (Event, hQueue->Context , hQueue, phEvent));
465
474
return UR_RESULT_SUCCESS;
466
475
}
467
476
@@ -501,11 +510,11 @@ void updateKernelArgs(std::vector<cl_mutable_dispatch_arg_khr> &CLArgs,
501
510
for (uint32_t i = 0 ; i < NumMemobjArgs; i++) {
502
511
const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg =
503
512
ArgMemobjList[i];
513
+ cl_mem arg_value = URMemObjArg.hNewMemObjArg ->CLMemory ;
504
514
cl_mutable_dispatch_arg_khr CLArg{
505
515
URMemObjArg.argIndex , // arg_index
506
516
sizeof (cl_mem), // arg_size
507
- cl_adapter::cast<const cl_mem *>(
508
- &URMemObjArg.hNewMemObjArg ) // arg_value
517
+ &arg_value // arg_value
509
518
};
510
519
511
520
CLArgs.push_back (CLArg);
@@ -549,7 +558,7 @@ ur_result_t validateCommandDesc(
549
558
// Verify that the device supports updating the aspects of the kernel that
550
559
// the user is requesting.
551
560
ur_device_handle_t URDevice = CommandBuffer->hDevice ;
552
- cl_device_id CLDevice = cl_adapter::cast<cl_device_id>( URDevice) ;
561
+ cl_device_id CLDevice = URDevice-> CLDevice ;
553
562
554
563
ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0 ;
555
564
CL_RETURN_ON_FAILURE (
@@ -601,7 +610,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
601
610
validateCommandDesc (hCommandBuffer, pUpdateKernelLaunch[i]));
602
611
}
603
612
604
- cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext );
613
+ cl_context CLContext = hCommandBuffer->hContext ->CLContext ;
614
+
605
615
cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr ;
606
616
UR_RETURN_ON_FAILURE (
607
617
cl_ext::getExtFuncFromContext<decltype (clUpdateMutableCommandsKHR)>(
@@ -657,8 +667,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
657
667
updateNDRange (CLLocalWorkSize, CommandWorkDim, LocalWorkSizePtr);
658
668
}
659
669
660
- cl_mutable_command_khr CLCommand =
661
- cl_adapter::cast<cl_mutable_command_khr>(Command->CLMutableCommand );
670
+ cl_mutable_command_khr CLCommand = Command->CLMutableCommand ;
662
671
Config = cl_mutable_dispatch_config_khr{
663
672
CLCommand,
664
673
static_cast <cl_uint>(CLArgs.size ()), // num_args
@@ -736,7 +745,7 @@ ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp(
736
745
uint32_t numSyncPointsInWaitList,
737
746
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
738
747
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
739
- cl_context CLContext = cl_adapter::cast<cl_context>( hCommandBuffer->hContext ) ;
748
+ cl_context CLContext = hCommandBuffer->hContext -> CLContext ;
740
749
cl_ext::clCommandBarrierWithWaitListKHR_fn clCommandBarrierWithWaitListKHR =
741
750
nullptr ;
742
751
UR_RETURN_ON_FAILURE (
0 commit comments