Skip to content

Commit 50931b1

Browse files
feature: in-order CommandList host sync support
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <[email protected]>
1 parent ae7cff3 commit 50931b1

File tree

3 files changed

+87
-24
lines changed

3 files changed

+87
-24
lines changed

level_zero/core/source/cmdlist/cmdlist_hw_immediate.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
177177
using BaseClass::inOrderDependencyCounterAllocation;
178178

179179
void printKernelsPrintfOutput(bool hangDetected);
180-
ze_result_t synchronizeInOrderExecution() const;
180+
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
181181

182182
MOCKABLE_VIRTUAL void checkAssert();
183183
std::atomic<bool> dependenciesPresent{false};

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -690,22 +690,28 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
690690

691691
template <GFXCORE_FAMILY gfxCoreFamily>
692692
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
693+
auto syncTaskCount = this->csr->peekTaskCount();
694+
ze_result_t status = ZE_RESULT_SUCCESS;
693695

694-
if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
696+
if (isInOrderExecutionEnabled()) {
697+
status = synchronizeInOrderExecution(timeout);
698+
} else if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
695699
const int64_t timeoutInMicroSeconds = timeout / 1000;
696-
auto syncTaskCount = this->csr->peekTaskCount();
697700
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutInMicroSeconds},
698701
syncTaskCount);
699702
if (waitStatus == NEO::WaitStatus::GpuHang) {
700-
this->printKernelsPrintfOutput(true);
701-
this->checkAssert();
702-
return ZE_RESULT_ERROR_DEVICE_LOST;
703+
status = ZE_RESULT_ERROR_DEVICE_LOST;
703704
}
705+
}
706+
707+
if (status == ZE_RESULT_SUCCESS) {
704708
this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
705-
this->printKernelsPrintfOutput(false);
706-
this->checkAssert();
707709
}
708-
return ZE_RESULT_SUCCESS;
710+
711+
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
712+
this->checkAssert();
713+
714+
return status;
709715
}
710716

711717
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -852,7 +858,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
852858
}
853859

854860
if (isInOrderExecutionEnabled()) {
855-
auto status = synchronizeInOrderExecution();
861+
auto status = synchronizeInOrderExecution(std::numeric_limits<uint64_t>::max());
856862
if (status != ZE_RESULT_SUCCESS) {
857863
return status;
858864
}
@@ -1061,23 +1067,42 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAll
10611067
}
10621068

10631069
template <GFXCORE_FAMILY gfxCoreFamily>
1064-
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution() const {
1070+
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution(uint64_t timeout) const {
1071+
std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, now;
1072+
uint64_t timeDiff = 0;
1073+
1074+
ze_result_t status = ZE_RESULT_NOT_READY;
1075+
10651076
auto hostAddress = static_cast<uint32_t *>(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
10661077
auto waitValue = this->inOrderDependencyCounter;
10671078

1068-
auto lastHangCheckTime = std::chrono::high_resolution_clock::now();
1079+
lastHangCheckTime = std::chrono::high_resolution_clock::now();
1080+
waitStartTime = lastHangCheckTime;
10691081

1070-
while (*hostAddress < waitValue) {
1082+
do {
10711083
this->csr->downloadAllocation(*this->inOrderDependencyCounterAllocation);
10721084

1073-
bool status = NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>());
1085+
if (NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>())) {
1086+
status = ZE_RESULT_SUCCESS;
1087+
break;
1088+
}
10741089

1075-
if (!status && this->csr->checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
1076-
return ZE_RESULT_ERROR_DEVICE_LOST;
1090+
if (this->csr->checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
1091+
status = ZE_RESULT_ERROR_DEVICE_LOST;
1092+
break;
10771093
}
1078-
}
10791094

1080-
return ZE_RESULT_SUCCESS;
1095+
if (timeout == std::numeric_limits<uint64_t>::max()) {
1096+
continue;
1097+
} else if (timeout == 0) {
1098+
break;
1099+
}
1100+
1101+
now = std::chrono::high_resolution_clock::now();
1102+
timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(now - waitStartTime).count();
1103+
} while (timeDiff < timeout);
1104+
1105+
return status;
10811106
}
10821107

10831108
} // namespace L0

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,19 +1127,57 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
11271127

11281128
const uint32_t failCounter = 3;
11291129
uint32_t callCounter = 0;
1130+
bool forceFail = false;
11301131

11311132
ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) {
11321133
callCounter++;
1133-
if (callCounter >= failCounter) {
1134-
*hostAddress = 1;
1134+
if (callCounter >= failCounter && !forceFail) {
1135+
(*hostAddress)++;
11351136
}
11361137
};
11371138

1138-
immCmdList->synchronizeInOrderExecution();
1139+
// single check - not ready
1140+
{
1141+
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0));
11391142

1140-
EXPECT_EQ(3u, callCounter);
1141-
EXPECT_EQ(2u, ultCsr->checkGpuHangDetectedCalled);
1142-
EXPECT_EQ(1u, *hostAddress);
1143+
EXPECT_EQ(1u, callCounter);
1144+
EXPECT_EQ(1u, ultCsr->checkGpuHangDetectedCalled);
1145+
EXPECT_EQ(0u, *hostAddress);
1146+
}
1147+
1148+
// timeout - not ready
1149+
{
1150+
forceFail = true;
1151+
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(10));
1152+
1153+
EXPECT_TRUE(callCounter > 1);
1154+
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
1155+
EXPECT_EQ(0u, *hostAddress);
1156+
}
1157+
1158+
// gpu hang
1159+
{
1160+
ultCsr->forceReturnGpuHang = true;
1161+
1162+
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, immCmdList->hostSynchronize(10));
1163+
1164+
EXPECT_TRUE(callCounter > 1);
1165+
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
1166+
EXPECT_EQ(0u, *hostAddress);
1167+
}
1168+
1169+
// success
1170+
{
1171+
ultCsr->checkGpuHangDetectedCalled = 0;
1172+
ultCsr->forceReturnGpuHang = false;
1173+
forceFail = false;
1174+
callCounter = 0;
1175+
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits<uint64_t>::max()));
1176+
1177+
EXPECT_EQ(failCounter, callCounter);
1178+
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
1179+
EXPECT_EQ(1u, *hostAddress);
1180+
}
11431181
}
11441182

11451183
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, IsAtLeastXeHpCore) {

0 commit comments

Comments
 (0)