Skip to content

Commit fb306c8

Browse files
feature: Signal in-order dependency counter in kernel split path
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <[email protected]>
1 parent 71c40c3 commit fb306c8

File tree

2 files changed

+98
-0
lines changed

2 files changed

+98
-0
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,6 +1357,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
13571357
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
13581358
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
13591359

1360+
if (this->inOrderExecutionEnabled && launchParams.isKernelSplitOperation) {
1361+
if (!signalEvent) {
1362+
NEO::PipeControlArgs args;
1363+
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
1364+
}
1365+
appendSignalInOrderDependencyCounter();
1366+
}
1367+
13601368
if (NEO::DebugManager.flags.EnableSWTags.get()) {
13611369
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
13621370
*commandContainer.getCommandStream(),

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,6 +1216,96 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT
12161216
context->freeMem(deviceAlloc);
12171217
}
12181218

1219+
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithoutEventThenAddBarrierAndSignalCounter, IsAtLeastXeHpCore) {
1220+
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
1221+
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
1222+
1223+
auto immCmdList = createImmCmdList<gfxCoreFamily>();
1224+
1225+
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
1226+
1227+
const size_t ptrBaseSize = 128;
1228+
const size_t offset = 1;
1229+
auto alignedPtr = alignedMalloc(ptrBaseSize, MemoryConstants::cacheLineSize);
1230+
auto unalignedPtr = ptrOffset(alignedPtr, offset);
1231+
1232+
immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, nullptr, 0, nullptr, false);
1233+
1234+
GenCmdList cmdList;
1235+
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed()));
1236+
1237+
auto cmdItor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
1238+
ASSERT_NE(cmdList.end(), cmdItor);
1239+
1240+
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*cmdItor);
1241+
1242+
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pcCmd->getPostSyncOperation());
1243+
1244+
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++cmdItor));
1245+
1246+
while (sdiCmd == nullptr && cmdItor != cmdList.end()) {
1247+
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++cmdItor));
1248+
}
1249+
1250+
ASSERT_NE(nullptr, sdiCmd);
1251+
1252+
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
1253+
EXPECT_EQ(0u, sdiCmd->getStoreQword());
1254+
EXPECT_EQ(1u, sdiCmd->getDataDword0());
1255+
1256+
alignedFree(alignedPtr);
1257+
}
1258+
1259+
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEventThenSignalCounter, IsAtLeastXeHpCore) {
1260+
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
1261+
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
1262+
1263+
auto immCmdList = createImmCmdList<gfxCoreFamily>();
1264+
1265+
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
1266+
1267+
auto eventPool = createEvents(1, false);
1268+
auto eventHandle = events[0]->toHandle();
1269+
1270+
const size_t ptrBaseSize = 128;
1271+
const size_t offset = 1;
1272+
auto alignedPtr = alignedMalloc(ptrBaseSize, MemoryConstants::cacheLineSize);
1273+
auto unalignedPtr = ptrOffset(alignedPtr, offset);
1274+
1275+
immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, eventHandle, 0, nullptr, false);
1276+
1277+
GenCmdList cmdList;
1278+
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed()));
1279+
1280+
auto cmdItor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
1281+
ASSERT_NE(cmdList.end(), cmdItor);
1282+
1283+
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*cmdItor);
1284+
ASSERT_NE(nullptr, pcCmd);
1285+
1286+
while (PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE == pcCmd->getPostSyncOperation()) {
1287+
cmdItor = find<PIPE_CONTROL *>(++cmdItor, cmdList.end());
1288+
ASSERT_NE(cmdList.end(), cmdItor);
1289+
1290+
pcCmd = genCmdCast<PIPE_CONTROL *>(*cmdItor);
1291+
ASSERT_NE(nullptr, pcCmd);
1292+
}
1293+
1294+
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++cmdItor));
1295+
1296+
while (sdiCmd == nullptr && cmdItor != cmdList.end()) {
1297+
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++cmdItor));
1298+
}
1299+
1300+
ASSERT_NE(nullptr, sdiCmd);
1301+
1302+
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
1303+
EXPECT_EQ(0u, sdiCmd->getStoreQword());
1304+
EXPECT_EQ(1u, sdiCmd->getDataDword0());
1305+
1306+
alignedFree(alignedPtr);
1307+
}
1308+
12191309
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
12201310
template <typename FamilyType>
12211311
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) {

0 commit comments

Comments
 (0)