@@ -406,6 +406,40 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *CallStr,
406
406
return mapError(Result);
407
407
#define ZE_CALL_NOCHECK (Call ) ZeCall().doCall(Call, #Call, false )
408
408
409
+ // This helper function increments the reference counter of the Queue
410
+ // without guarding with a lock.
411
+ // It is the caller's responsibility to make sure the lock is acquired
412
+ // on the Queue that is passed in.
413
+ inline static void piQueueRetainNoLock (pi_queue Queue) { Queue->RefCount ++; }
414
+
415
+ // This helper function creates a pi_event and associate a pi_queue.
416
+ // Note that the caller of this function must have acquired lock on the Queue
417
+ // that is passed in.
418
+ // \param Queue pi_queue to associate with a new event.
419
+ // \param Event a pointer to hold the newly created pi_event
420
+ // \param CommandType various command type determined by the caller
421
+ // \param ZeCommandList the handle to associate with the newly created event
422
+ inline static pi_result
423
+ createEventAndAssociateQueue (pi_queue Queue, pi_event *Event,
424
+ pi_command_type CommandType,
425
+ ze_command_list_handle_t ZeCommandList) {
426
+ pi_result Res = piEventCreate (Queue->Context , Event);
427
+ if (Res != PI_SUCCESS)
428
+ return Res;
429
+
430
+ (*Event)->Queue = Queue;
431
+ (*Event)->CommandType = CommandType;
432
+ (*Event)->ZeCommandList = ZeCommandList;
433
+
434
+ // We need to increment the reference counter here to avoid pi_queue
435
+ // being released before the associated pi_event is released because
436
+ // piEventRelease requires access to the associated pi_queue.
437
+ // In piEventRelease, the reference counter of the Queue is decremented
438
+ // to release it.
439
+ piQueueRetainNoLock (Queue);
440
+ return PI_SUCCESS;
441
+ }
442
+
409
443
pi_result _pi_device::initialize () {
410
444
uint32_t numQueueGroups = 0 ;
411
445
ZE_CALL (zeDeviceGetCommandQueueGroupProperties (ZeDevice, &numQueueGroups,
@@ -1988,34 +2022,44 @@ pi_result piQueueRetain(pi_queue Queue) {
1988
2022
// Lock automatically releases when this goes out of scope.
1989
2023
std::lock_guard<std::mutex> lock (Queue->PiQueueMutex );
1990
2024
1991
- ++ (Queue-> RefCount );
2025
+ piQueueRetainNoLock (Queue);
1992
2026
return PI_SUCCESS;
1993
2027
}
1994
2028
1995
2029
pi_result piQueueRelease (pi_queue Queue) {
1996
2030
PI_ASSERT (Queue, PI_INVALID_QUEUE);
2031
+ // We need to use a bool variable here to check the condition that
2032
+ // RefCount becomes zero atomically with PiQueueMutex lock.
2033
+ // Then, we can release the lock before we remove the Queue below.
2034
+ bool RefCountZero = false ;
2035
+ {
2036
+ std::lock_guard<std::mutex> Lock (Queue->PiQueueMutex );
2037
+ Queue->RefCount --;
2038
+ if (Queue->RefCount == 0 )
2039
+ RefCountZero = true ;
2040
+
2041
+ if (RefCountZero) {
2042
+ // It is possible to get to here and still have an open command list
2043
+ // if no wait or finish ever occurred for this queue. But still need
2044
+ // // TODO: o make sure commands get executed.
2045
+ if (auto Res = Queue->executeOpenCommandList ())
2046
+ return Res;
1997
2047
1998
- // Lock automatically releases when this goes out of scope.
1999
- std::lock_guard<std::mutex> lock (Queue->PiQueueMutex );
2000
-
2001
- if (--(Queue->RefCount ) == 0 ) {
2002
- // It is possible to get to here and still have an open command list
2003
- // if no wait or finish ever occurred for this queue. But still need
2004
- // to make sure commands get executed.
2005
- if (auto Res = Queue->executeOpenCommandList ())
2006
- return Res;
2048
+ // Destroy all the fences created associated with this queue.
2049
+ for (const auto &MapEntry : Queue->ZeCommandListFenceMap ) {
2050
+ ZE_CALL (zeFenceDestroy (MapEntry.second ));
2051
+ }
2052
+ Queue->ZeCommandListFenceMap .clear ();
2053
+ ZE_CALL (zeCommandQueueDestroy (Queue->ZeCommandQueue ));
2054
+ Queue->ZeCommandQueue = nullptr ;
2007
2055
2008
- // Destroy all the fences created associated with this queue.
2009
- for (const auto &MapEntry : Queue->ZeCommandListFenceMap ) {
2010
- ZE_CALL (zeFenceDestroy (MapEntry.second ));
2056
+ zePrint (" piQueueRelease NumTimesClosedFull %d, NumTimesClosedEarly %d\n " ,
2057
+ Queue->NumTimesClosedFull , Queue->NumTimesClosedEarly );
2011
2058
}
2012
- Queue->ZeCommandListFenceMap .clear ();
2013
- ZE_CALL (zeCommandQueueDestroy (Queue->ZeCommandQueue ));
2014
- Queue->ZeCommandQueue = nullptr ;
2015
-
2016
- zePrint (" piQueueRelease NumTimesClosedFull %d, NumTimesClosedEarly %d\n " ,
2017
- Queue->NumTimesClosedFull , Queue->NumTimesClosedEarly );
2018
2059
}
2060
+
2061
+ if (RefCountZero)
2062
+ delete Queue;
2019
2063
return PI_SUCCESS;
2020
2064
}
2021
2065
@@ -3411,13 +3455,11 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
3411
3455
return Res;
3412
3456
3413
3457
ze_event_handle_t ZeEvent = nullptr ;
3414
- auto Res = piEventCreate (Kernel->Program ->Context , Event);
3458
+ pi_result Res = createEventAndAssociateQueue (
3459
+ Queue, Event, PI_COMMAND_TYPE_NDRANGE_KERNEL, ZeCommandList);
3415
3460
if (Res != PI_SUCCESS)
3416
3461
return Res;
3417
-
3418
- (*Event)->Queue = Queue;
3419
- (*Event)->CommandType = PI_COMMAND_TYPE_NDRANGE_KERNEL;
3420
- (*Event)->ZeCommandList = ZeCommandList;
3462
+ ZeEvent = (*Event)->ZeEvent ;
3421
3463
3422
3464
// Save the kernel in the event, so that when the event is signalled
3423
3465
// the code can do a piKernelRelease on this kernel.
@@ -3430,8 +3472,6 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
3430
3472
// in CommandData.
3431
3473
piKernelRetain (Kernel);
3432
3474
3433
- ZeEvent = (*Event)->ZeEvent ;
3434
-
3435
3475
ze_event_handle_t *ZeEventWaitList =
3436
3476
_pi_event::createZeEventList (NumEventsInWaitList, EventWaitList);
3437
3477
if (!ZeEventWaitList)
@@ -3696,6 +3736,11 @@ pi_result piEventRelease(pi_event Event) {
3696
3736
auto Context = Event->Context ;
3697
3737
ZE_CALL (Context->decrementAliveEventsInPool (Event->ZeEventPool ));
3698
3738
3739
+ // We intentionally incremented the reference counter when an event is
3740
+ // created so that we can avoid pi_queue is released before the associated
3741
+ // pi_event is released. Here we have to decrement it so pi_queue
3742
+ // can be released successfully.
3743
+ piQueueRelease (Event->Queue );
3699
3744
delete Event;
3700
3745
}
3701
3746
return PI_SUCCESS;
@@ -3885,14 +3930,10 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
3885
3930
3886
3931
ze_event_handle_t ZeEvent = nullptr ;
3887
3932
if (Event) {
3888
- auto Res = piEventCreate (Queue->Context , Event);
3933
+ auto Res = createEventAndAssociateQueue (Queue, Event, PI_COMMAND_TYPE_USER,
3934
+ ZeCommandList);
3889
3935
if (Res != PI_SUCCESS)
3890
3936
return Res;
3891
-
3892
- (*Event)->Queue = Queue;
3893
- (*Event)->CommandType = PI_COMMAND_TYPE_USER;
3894
- (*Event)->ZeCommandList = ZeCommandList;
3895
-
3896
3937
ZeEvent = (*Event)->ZeEvent ;
3897
3938
}
3898
3939
@@ -3959,7 +4000,7 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
3959
4000
pi_bool BlockingWrite, size_t Size, const void *Src,
3960
4001
pi_uint32 NumEventsInWaitList,
3961
4002
const pi_event *EventWaitList, pi_event *Event) {
3962
-
4003
+ PI_ASSERT (Queue, PI_INVALID_QUEUE);
3963
4004
// Get a new command list to be used on this call
3964
4005
ze_command_list_handle_t ZeCommandList = nullptr ;
3965
4006
ze_fence_handle_t ZeFence = nullptr ;
@@ -3969,14 +4010,10 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
3969
4010
3970
4011
ze_event_handle_t ZeEvent = nullptr ;
3971
4012
if (Event) {
3972
- auto Res = piEventCreate (Queue->Context , Event);
4013
+ auto Res =
4014
+ createEventAndAssociateQueue (Queue, Event, CommandType, ZeCommandList);
3973
4015
if (Res != PI_SUCCESS)
3974
4016
return Res;
3975
-
3976
- (*Event)->Queue = Queue;
3977
- (*Event)->CommandType = CommandType;
3978
- (*Event)->ZeCommandList = ZeCommandList;
3979
-
3980
4017
ZeEvent = (*Event)->ZeEvent ;
3981
4018
}
3982
4019
@@ -4019,7 +4056,7 @@ static pi_result enqueueMemCopyRectHelper(
4019
4056
size_t DstSlicePitch, pi_bool Blocking, pi_uint32 NumEventsInWaitList,
4020
4057
const pi_event *EventWaitList, pi_event *Event) {
4021
4058
4022
- PI_ASSERT (Region && SrcOrigin && DstOrigin, PI_INVALID_VALUE);
4059
+ PI_ASSERT (Region && SrcOrigin && DstOrigin && Queue , PI_INVALID_VALUE);
4023
4060
4024
4061
// Get a new command list to be used on this call
4025
4062
ze_command_list_handle_t ZeCommandList = nullptr ;
@@ -4030,14 +4067,10 @@ static pi_result enqueueMemCopyRectHelper(
4030
4067
4031
4068
ze_event_handle_t ZeEvent = nullptr ;
4032
4069
if (Event) {
4033
- auto Res = piEventCreate (Queue->Context , Event);
4070
+ auto Res =
4071
+ createEventAndAssociateQueue (Queue, Event, CommandType, ZeCommandList);
4034
4072
if (Res != PI_SUCCESS)
4035
4073
return Res;
4036
-
4037
- (*Event)->Queue = Queue;
4038
- (*Event)->CommandType = CommandType;
4039
- (*Event)->ZeCommandList = ZeCommandList;
4040
-
4041
4074
ZeEvent = (*Event)->ZeEvent ;
4042
4075
}
4043
4076
@@ -4202,7 +4235,7 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr,
4202
4235
const void *Pattern, size_t PatternSize, size_t Size,
4203
4236
pi_uint32 NumEventsInWaitList,
4204
4237
const pi_event *EventWaitList, pi_event *Event) {
4205
-
4238
+ PI_ASSERT (Queue, PI_INVALID_QUEUE);
4206
4239
// Get a new command list to be used on this call
4207
4240
ze_command_list_handle_t ZeCommandList = nullptr ;
4208
4241
ze_fence_handle_t ZeFence = nullptr ;
@@ -4212,14 +4245,10 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr,
4212
4245
4213
4246
ze_event_handle_t ZeEvent = nullptr ;
4214
4247
if (Event) {
4215
- auto Res = piEventCreate (Queue->Context , Event);
4248
+ auto Res =
4249
+ createEventAndAssociateQueue (Queue, Event, CommandType, ZeCommandList);
4216
4250
if (Res != PI_SUCCESS)
4217
4251
return Res;
4218
-
4219
- (*Event)->Queue = Queue;
4220
- (*Event)->CommandType = CommandType;
4221
- (*Event)->ZeCommandList = ZeCommandList;
4222
-
4223
4252
ZeEvent = (*Event)->ZeEvent ;
4224
4253
}
4225
4254
@@ -4297,14 +4326,13 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
4297
4326
ze_event_handle_t ZeEvent = nullptr ;
4298
4327
4299
4328
if (Event) {
4300
- auto Res = piEventCreate (Queue->Context , Event);
4329
+ // Lock automatically releases when this goes out of scope.
4330
+ std::lock_guard<std::mutex> lock (Queue->PiQueueMutex );
4331
+
4332
+ auto Res = createEventAndAssociateQueue (
4333
+ Queue, Event, PI_COMMAND_TYPE_MEM_BUFFER_MAP, ZeCommandList);
4301
4334
if (Res != PI_SUCCESS)
4302
4335
return Res;
4303
-
4304
- (*Event)->Queue = Queue;
4305
- (*Event)->CommandType = PI_COMMAND_TYPE_MEM_BUFFER_MAP;
4306
- (*Event)->ZeCommandList = ZeCommandList;
4307
-
4308
4336
ZeEvent = (*Event)->ZeEvent ;
4309
4337
}
4310
4338
@@ -4395,15 +4423,15 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
4395
4423
PI_ASSERT (Event, PI_INVALID_EVENT);
4396
4424
4397
4425
ze_event_handle_t ZeEvent = nullptr ;
4426
+
4398
4427
if (Event) {
4399
- auto Res = piEventCreate (Queue->Context , Event);
4428
+ // Lock automatically releases when this goes out of scope.
4429
+ std::lock_guard<std::mutex> lock (Queue->PiQueueMutex );
4430
+
4431
+ auto Res = createEventAndAssociateQueue (
4432
+ Queue, Event, PI_COMMAND_TYPE_MEM_BUFFER_UNMAP, ZeCommandList);
4400
4433
if (Res != PI_SUCCESS)
4401
4434
return Res;
4402
-
4403
- (*Event)->Queue = Queue;
4404
- (*Event)->CommandType = PI_COMMAND_TYPE_MEM_BUFFER_UNMAP;
4405
- (*Event)->ZeCommandList = ZeCommandList;
4406
-
4407
4435
ZeEvent = (*Event)->ZeEvent ;
4408
4436
}
4409
4437
@@ -4537,7 +4565,7 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue,
4537
4565
size_t RowPitch, size_t SlicePitch,
4538
4566
pi_uint32 NumEventsInWaitList,
4539
4567
const pi_event *EventWaitList, pi_event *Event) {
4540
-
4568
+ PI_ASSERT (Queue, PI_INVALID_QUEUE);
4541
4569
// Get a new command list to be used on this call
4542
4570
ze_command_list_handle_t ZeCommandList = nullptr ;
4543
4571
ze_fence_handle_t ZeFence = nullptr ;
@@ -4547,14 +4575,10 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue,
4547
4575
4548
4576
ze_event_handle_t ZeEvent = nullptr ;
4549
4577
if (Event) {
4550
- auto Res = piEventCreate (Queue->Context , Event);
4578
+ auto Res =
4579
+ createEventAndAssociateQueue (Queue, Event, CommandType, ZeCommandList);
4551
4580
if (Res != PI_SUCCESS)
4552
4581
return Res;
4553
-
4554
- (*Event)->Queue = Queue;
4555
- (*Event)->CommandType = CommandType;
4556
- (*Event)->ZeCommandList = ZeCommandList;
4557
-
4558
4582
ZeEvent = (*Event)->ZeEvent ;
4559
4583
}
4560
4584
@@ -5154,14 +5178,10 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size,
5154
5178
// TODO: do we need to create a unique command type for this?
5155
5179
ze_event_handle_t ZeEvent = nullptr ;
5156
5180
if (Event) {
5157
- auto Res = piEventCreate (Queue->Context , Event);
5181
+ auto Res = createEventAndAssociateQueue (Queue, Event, PI_COMMAND_TYPE_USER,
5182
+ ZeCommandList);
5158
5183
if (Res != PI_SUCCESS)
5159
5184
return Res;
5160
-
5161
- (*Event)->Queue = Queue;
5162
- (*Event)->CommandType = PI_COMMAND_TYPE_USER;
5163
- (*Event)->ZeCommandList = ZeCommandList;
5164
-
5165
5185
ZeEvent = (*Event)->ZeEvent ;
5166
5186
}
5167
5187
@@ -5214,14 +5234,10 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
5214
5234
// TODO: do we need to create a unique command type for this?
5215
5235
ze_event_handle_t ZeEvent = nullptr ;
5216
5236
if (Event) {
5217
- auto Res = piEventCreate (Queue->Context , Event);
5237
+ auto Res = createEventAndAssociateQueue (Queue, Event, PI_COMMAND_TYPE_USER,
5238
+ ZeCommandList);
5218
5239
if (Res != PI_SUCCESS)
5219
5240
return Res;
5220
-
5221
- (*Event)->Queue = Queue;
5222
- (*Event)->CommandType = PI_COMMAND_TYPE_USER;
5223
- (*Event)->ZeCommandList = ZeCommandList;
5224
-
5225
5241
ZeEvent = (*Event)->ZeEvent ;
5226
5242
}
5227
5243
0 commit comments