Skip to content

Commit a827836

Browse files
[SYCL] reset enqueue status so that buffer can be used after failure is addressed (#17871)
CMPLRLLVM-65738 - If we run out of memory, an exception is thrown and can be caught by the user application. They can address this in several ways: free memory, .wait() on outstanding operations, etc. But despite addressing the problem, SYCL has a bug wherein the buffer is not usable. This is because the EnqueueStatus isn't reset, the Command/Requirement cannot be re-enqueued. The fix here is straightforward. We also improve the FailedDependency unit test.
1 parent 0cec59e commit a827836

File tree

3 files changed

+34
-7
lines changed

3 files changed

+34
-7
lines changed

sycl/source/detail/scheduler/graph_processor.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,13 @@ bool Scheduler::GraphProcessor::enqueueCommand(
8484
return false;
8585
}
8686

87+
// Reset enqueue status if reattempting
88+
89+
if (!Cmd->isHostTask() &&
90+
Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) {
91+
Cmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
92+
}
93+
8794
// Recursively enqueue all the implicit + explicit backend level dependencies
8895
// first and exit immediately if any of the commands cannot be enqueued.
8996
for (const EventImplPtr &Event : Cmd->getPreparedDepsEvents()) {

sycl/unittests/scheduler/FailedCommands.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,14 @@ TEST_F(SchedulerTest, FailedDependency) {
2020
queue Queue(context(Plt), default_selector_v);
2121

2222
detail::Requirement MockReq = getMockRequirement();
23-
MockCommand MDep(detail::getSyclObjImpl(Queue));
23+
MockCommand MDepFail(
24+
false, detail::getSyclObjImpl(Queue)); // <-- will fail to enqueue
2425
MockCommand MUser(detail::getSyclObjImpl(Queue));
25-
MDep.addUser(&MUser);
26+
MDepFail.addUser(&MUser);
2627
std::vector<detail::Command *> ToCleanUp;
27-
(void)MUser.addDep(detail::DepDesc{&MDep, &MockReq, nullptr}, ToCleanUp);
28+
(void)MUser.addDep(detail::DepDesc{&MDepFail, &MockReq, nullptr}, ToCleanUp);
2829
MUser.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady;
29-
MDep.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueFailed;
30+
MDepFail.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady;
3031

3132
MockScheduler MS;
3233
auto Lock = MS.acquireGraphReadLock();
@@ -35,13 +36,13 @@ TEST_F(SchedulerTest, FailedDependency) {
3536
MockScheduler::enqueueCommand(&MUser, Res, detail::NON_BLOCKING);
3637

3738
ASSERT_FALSE(Enqueued) << "Enqueue process must fail\n";
38-
ASSERT_EQ(Res.MCmd, &MDep) << "Wrong failed command\n";
39+
ASSERT_EQ(Res.MCmd, &MDepFail) << "Wrong failed command\n";
3940
ASSERT_EQ(Res.MResult, detail::EnqueueResultT::SyclEnqueueFailed)
4041
<< "Enqueue process must fail\n";
4142
ASSERT_EQ(MUser.MEnqueueStatus, detail::EnqueueResultT::SyclEnqueueReady)
4243
<< "MUser shouldn't be marked as failed\n";
43-
ASSERT_EQ(MDep.MEnqueueStatus, detail::EnqueueResultT::SyclEnqueueFailed)
44-
<< "MDep should be marked as failed\n";
44+
ASSERT_EQ(MDepFail.MEnqueueStatus, detail::EnqueueResultT::SyclEnqueueFailed)
45+
<< "MDepFail should be marked as failed\n";
4546
}
4647

4748
void RunWithFailedCommandsAndCheck(bool SyncExceptionExpected,

sycl/unittests/scheduler/SchedulerTestUtils.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,17 @@ class MockCommand : public sycl::detail::Command {
5353
EXPECT_CALL(*this, enqueue).Times(AnyNumber());
5454
}
5555

56+
// This Mock will fail to enqueue.
57+
MockCommand(
58+
bool, sycl::detail::QueueImplPtr Queue,
59+
sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG)
60+
: Command{Type, Queue}, MRequirement{std::move(getMockRequirement())} {
61+
using namespace testing;
62+
ON_CALL(*this, enqueue)
63+
.WillByDefault(Invoke(this, &MockCommand::enqueueFail));
64+
EXPECT_CALL(*this, enqueue).Times(AnyNumber());
65+
}
66+
5667
void printDot(std::ostream &) const override {}
5768
void emitInstrumentationData() override {}
5869

@@ -70,6 +81,14 @@ class MockCommand : public sycl::detail::Command {
7081
std::vector<sycl::detail::Command *> &ToCleanUp) {
7182
return sycl::detail::Command::enqueue(EnqueueResult, Blocking, ToCleanUp);
7283
}
84+
bool enqueueFail(sycl::detail::EnqueueResultT &EnqueueResult,
85+
sycl::detail::BlockingT Blocking,
86+
std::vector<sycl::detail::Command *> &ToCleanUp) {
87+
this->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueFailed;
88+
EnqueueResult = {sycl::detail::EnqueueResultT::SyclEnqueueFailed, this};
89+
ToCleanUp.push_back(this);
90+
return false;
91+
}
7392

7493
ur_result_t MRetVal = UR_RESULT_SUCCESS;
7594

0 commit comments

Comments
 (0)