Skip to content

Commit 41402c5

Browse files
authored
[SYCL] Reset command lists in the synchronization points (#6168)
Don't reset command lists each time we try to get an available command list. We should check for signaled command lists in synchronization points like piQueueFinish, piQueueRelease, piEventsWait instead.
1 parent fff033c commit 41402c5

File tree

2 files changed

+87
-72
lines changed

2 files changed

+87
-72
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 83 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,31 @@ _pi_queue::_pi_queue(std::vector<ze_command_queue_handle_t> &ComputeQueues,
11341134
CopyCommandBatch.QueueBatchSize = ZeCommandListBatchCopyConfig.startSize();
11351135
}
11361136

1137+
// Reset signalled command lists in the queue and put them to the cache of
1138+
// command lists. A caller must not lock the queue mutex.
1139+
pi_result _pi_queue::resetCommandLists() {
1140+
// We check for command lists that have been already signalled, but have not
1141+
// been added to the available list yet. Each command list has a fence
1142+
// associated which tracks if a command list has completed dispatch of its
1143+
// commands and is ready for reuse. If a command list is found to have been
1144+
// signalled, then the command list & fence are reset and command list is
1145+
// returned to the command list cache. All events associated with command list
1146+
// are cleaned up if command list was reset.
1147+
std::scoped_lock Lock(this->Mutex);
1148+
for (auto &&it = CommandListMap.begin(); it != CommandListMap.end(); ++it) {
1149+
// It is possible that the fence was already noted as signalled and
1150+
// reset. In that case the InUse flag will be false.
1151+
if (it->second.InUse) {
1152+
ze_result_t ZeResult =
1153+
ZE_CALL_NOCHECK(zeFenceQueryStatus, (it->second.ZeFence));
1154+
if (ZeResult == ZE_RESULT_SUCCESS) {
1155+
PI_CALL(resetCommandList(it, true));
1156+
}
1157+
}
1158+
}
1159+
return PI_SUCCESS;
1160+
}
1161+
11371162
// Retrieve an available command list to be used in a PI call.
11381163
pi_result
11391164
_pi_context::getAvailableCommandList(pi_queue Queue,
@@ -1213,28 +1238,6 @@ _pi_context::getAvailableCommandList(pi_queue Queue,
12131238
}
12141239
}
12151240

1216-
// If there are no available command lists in the cache, then we check for
1217-
// command lists that have already signalled, but have not been added to the
1218-
// available list yet. Each command list has a fence associated which tracks
1219-
// if a command list has completed dispatch of its commands and is ready for
1220-
// reuse. If a command list is found to have been signalled, then the
1221-
// command list & fence are reset and we return.
1222-
for (auto it = Queue->CommandListMap.begin();
1223-
it != Queue->CommandListMap.end(); ++it) {
1224-
// Make sure this is the command list type needed.
1225-
if (UseCopyEngine != it->second.isCopy(Queue))
1226-
continue;
1227-
1228-
ze_result_t ZeResult =
1229-
ZE_CALL_NOCHECK(zeFenceQueryStatus, (it->second.ZeFence));
1230-
if (ZeResult == ZE_RESULT_SUCCESS) {
1231-
Queue->resetCommandList(it, false);
1232-
CommandList = it;
1233-
CommandList->second.InUse = true;
1234-
return PI_SUCCESS;
1235-
}
1236-
}
1237-
12381241
// If there are no available command lists nor signalled command lists, then
12391242
// we must create another command list if we have not exceed the maximum
12401243
// command lists we can create.
@@ -3392,51 +3395,54 @@ pi_result piQueueFinish(pi_queue Queue) {
33923395

33933396
Queue->synchronize();
33943397
return PI_SUCCESS;
3395-
}
3396-
3397-
std::unique_lock Lock(Queue->Mutex);
3398-
std::vector<ze_command_queue_handle_t> ZeQueues;
3398+
} else {
3399+
std::unique_lock Lock(Queue->Mutex);
3400+
std::vector<ze_command_queue_handle_t> ZeQueues;
33993401

3400-
// execute any command list that may still be open.
3401-
if (auto Res = Queue->executeAllOpenCommandLists())
3402-
return Res;
3402+
// execute any command list that may still be open.
3403+
if (auto Res = Queue->executeAllOpenCommandLists())
3404+
return Res;
34033405

3404-
// Make a copy of queues to sync and release the lock.
3405-
ZeQueues = Queue->CopyQueueGroup.ZeQueues;
3406-
std::copy(Queue->ComputeQueueGroup.ZeQueues.begin(),
3407-
Queue->ComputeQueueGroup.ZeQueues.end(),
3408-
std::back_inserter(ZeQueues));
3406+
// Make a copy of queues to sync and release the lock.
3407+
ZeQueues = Queue->CopyQueueGroup.ZeQueues;
3408+
std::copy(Queue->ComputeQueueGroup.ZeQueues.begin(),
3409+
Queue->ComputeQueueGroup.ZeQueues.end(),
3410+
std::back_inserter(ZeQueues));
34093411

3410-
// Remember the last command's event.
3411-
auto LastCommandEvent = Queue->LastCommandEvent;
3412+
// Remember the last command's event.
3413+
auto LastCommandEvent = Queue->LastCommandEvent;
34123414

3413-
// Don't hold a lock to the queue's mutex while waiting.
3414-
// This allows continue working with the queue from other threads.
3415-
// TODO: this currently exhibits some issues in the driver, so
3416-
// we control this with an env var. Remove this control when
3417-
// we settle one way or the other.
3418-
static bool HoldLock =
3419-
std::getenv("SYCL_PI_LEVEL_ZERO_QUEUE_FINISH_HOLD_LOCK") != nullptr;
3420-
if (!HoldLock) {
3421-
Lock.unlock();
3422-
}
3415+
// Don't hold a lock to the queue's mutex while waiting.
3416+
// This allows continue working with the queue from other threads.
3417+
// TODO: this currently exhibits some issues in the driver, so
3418+
// we control this with an env var. Remove this control when
3419+
// we settle one way or the other.
3420+
static bool HoldLock =
3421+
std::getenv("SYCL_PI_LEVEL_ZERO_QUEUE_FINISH_HOLD_LOCK") != nullptr;
3422+
if (!HoldLock) {
3423+
Lock.unlock();
3424+
}
34233425

3424-
for (auto ZeQueue : ZeQueues) {
3425-
if (ZeQueue)
3426-
ZE_CALL(zeHostSynchronize, (ZeQueue));
3427-
}
3426+
for (auto ZeQueue : ZeQueues) {
3427+
if (ZeQueue)
3428+
ZE_CALL(zeHostSynchronize, (ZeQueue));
3429+
}
34283430

3429-
// Prevent unneeded already finished events to show up in the wait list.
3430-
// We can only do so if nothing else was submitted to the queue
3431-
// while we were synchronizing it.
3432-
if (!HoldLock) {
3433-
std::scoped_lock Lock(Queue->Mutex);
3434-
if (LastCommandEvent == Queue->LastCommandEvent) {
3431+
// Prevent unneeded already finished events to show up in the wait list.
3432+
// We can only do so if nothing else was submitted to the queue
3433+
// while we were synchronizing it.
3434+
if (!HoldLock) {
3435+
std::scoped_lock Lock(Queue->Mutex);
3436+
if (LastCommandEvent == Queue->LastCommandEvent) {
3437+
Queue->LastCommandEvent = nullptr;
3438+
}
3439+
} else {
34353440
Queue->LastCommandEvent = nullptr;
34363441
}
3437-
} else {
3438-
Queue->LastCommandEvent = nullptr;
34393442
}
3443+
// Reset signalled command lists and return them back to the cache of
3444+
// available command lists.
3445+
Queue->resetCommandLists();
34403446
return PI_SUCCESS;
34413447
}
34423448

@@ -5865,25 +5871,30 @@ pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList,
58655871
return Queue->executeCommandList(CommandList);
58665872
}
58675873

5868-
// If wait-list is empty, then this particular command should wait until
5869-
// all previous enqueued commands to the command-queue have completed.
5870-
//
5871-
// TODO: find a way to do that without blocking the host.
5874+
{
5875+
// If wait-list is empty, then this particular command should wait until
5876+
// all previous enqueued commands to the command-queue have completed.
5877+
//
5878+
// TODO: find a way to do that without blocking the host.
58725879

5873-
// Lock automatically releases when this goes out of scope.
5874-
std::scoped_lock lock(Queue->Mutex);
5880+
// Lock automatically releases when this goes out of scope.
5881+
std::scoped_lock lock(Queue->Mutex);
58755882

5876-
auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER,
5877-
Queue->CommandListMap.end());
5878-
if (Res != PI_SUCCESS)
5879-
return Res;
5883+
auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER,
5884+
Queue->CommandListMap.end());
5885+
if (Res != PI_SUCCESS)
5886+
return Res;
5887+
5888+
Queue->synchronize();
58805889

5881-
Queue->synchronize();
5890+
Queue->LastCommandEvent = *Event;
5891+
5892+
ZE_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
5893+
(*Event)->Completed = true;
5894+
}
58825895

5883-
Queue->LastCommandEvent = *Event;
5896+
Queue->resetCommandLists();
58845897

5885-
ZE_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
5886-
(*Event)->Completed = true;
58875898
return PI_SUCCESS;
58885899
}
58895900

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,10 @@ struct _pi_queue : _pi_object {
891891
pi_result resetCommandList(pi_command_list_ptr_t CommandList,
892892
bool MakeAvailable);
893893

894+
// Reset signalled command lists in the queue and put them to the cache of
895+
// command lists. A caller must not lock the queue mutex.
896+
pi_result resetCommandLists();
897+
894898
// Returns true if an OpenCommandList has commands that need to be submitted.
895899
// If IsCopy is 'true', then the OpenCommandList containing copy commands is
896900
// checked. Otherwise, the OpenCommandList containing compute commands is

0 commit comments

Comments
 (0)