Skip to content

Commit a2386ad

Browse files
Correct programming of implicit args on pre-XeHp platforms
On pre-XeHp platforms implicit args aren't at the beginning of indirect data, GPU address of implicit args buffer is programmed within cross thread data Related-To: NEO-5081, IGC-4710 Signed-off-by: Mateusz Jablonski <[email protected]>
1 parent 06a4d2c commit a2386ad

File tree

12 files changed

+230
-61
lines changed

12 files changed

+230
-61
lines changed

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,20 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelS
660660
EXPECT_EQ(1u, event->getPacketsInUse());
661661
}
662662

663-
HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithImplicitArgsWhenAppendingThenMiMathCommandsForWorkGroupCountAndGlobalWorkSizeAndWorkDimAreProgrammed) {
663+
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
664+
665+
template <typename FamilyType>
666+
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) {
667+
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
668+
auto implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernel.pImplicitArgs.get(), kernel.getKernelDescriptor(), neoDevice->getHardwareInfo());
669+
return implicitArgsProgrammingSize - sizeof(ImplicitArgs);
670+
} else {
671+
return 0u;
672+
}
673+
}
674+
};
675+
676+
HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWithImplicitArgsWhenAppendingThenMiMathCommandsForWorkGroupCountAndGlobalWorkSizeAndWorkDimAreProgrammed) {
664677
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
665678
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
666679
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
@@ -669,12 +682,12 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithImplicitArgsWhe
669682
Mock<::L0::Kernel> kernel;
670683
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
671684
kernel.module = pMockModule.get();
685+
kernel.immutableData.crossThreadDataSize = sizeof(uint64_t);
672686
kernel.pImplicitArgs.reset(new ImplicitArgs());
687+
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(*kernel.immutableData.kernelDescriptor);
673688

674689
kernel.setGroupSize(1, 1, 1);
675690

676-
auto implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernel.pImplicitArgs.get(), kernel.getKernelDescriptor(), neoDevice->getHardwareInfo());
677-
678691
ze_result_t returnValue;
679692
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
680693

@@ -688,7 +701,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithImplicitArgsWhe
688701
nullptr, 0, nullptr);
689702
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
690703
auto heap = commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT);
691-
uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + implicitArgsProgrammingSize - sizeof(ImplicitArgs);
704+
uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + getIndirectHeapOffsetForImplicitArgsBuffer<FamilyType>(kernel);
705+
692706
auto workDimStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
693707
workDimStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R0);
694708
workDimStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA);

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "shared/source/helpers/per_thread_data.h"
1212
#include "shared/source/utilities/software_tags_manager.h"
1313
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
14+
#include "shared/test/common/helpers/unit_test_helper.h"
1415
#include "shared/test/common/mocks/mock_compilers.h"
1516
#include "shared/test/common/test_macros/test.h"
1617

@@ -937,14 +938,16 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
937938
expectedImplicitArgs.groupCountZ = 3;
938939
}
939940

941+
template <typename FamilyType>
940942
void dispatchKernelWithImplicitArgs() {
941943
expectedImplicitArgs.globalSizeX = expectedImplicitArgs.localSizeX * expectedImplicitArgs.groupCountX;
942944
expectedImplicitArgs.globalSizeY = expectedImplicitArgs.localSizeY * expectedImplicitArgs.groupCountY;
943945
expectedImplicitArgs.globalSizeZ = expectedImplicitArgs.localSizeZ * expectedImplicitArgs.groupCountZ;
944946

945947
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
946948
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
947-
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
949+
950+
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(*kernelDescriptor);
948951
kernelDescriptor->kernelAttributes.simdSize = expectedImplicitArgs.simdWidth;
949952
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[0] = workgroupDimOrder[0];
950953
kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[1] = workgroupDimOrder[1];
@@ -957,6 +960,7 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
957960
kernel->initialize(&kernelDesc);
958961
kernel->kernelRequiresGenerationOfLocalIdsByRuntime = kernelRequiresGenerationOfLocalIdsByRuntime;
959962
kernel->requiredWorkgroupOrder = requiredWorkgroupOrder;
963+
kernel->setCrossThreadData(sizeof(uint64_t));
960964

961965
EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
962966
ASSERT_NE(nullptr, kernel->getImplicitArgs());
@@ -982,7 +986,9 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
982986
auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup();
983987
EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + implicitArgsProgrammingSize);
984988

985-
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
989+
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
990+
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
991+
}
986992
expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation()->getGpuAddress();
987993
}
988994
std::unique_ptr<L0::CommandList> commandList;
@@ -994,15 +1000,16 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
9941000
bool kernelRequiresGenerationOfLocalIdsByRuntime = true;
9951001
uint32_t requiredWorkgroupOrder = 0;
9961002
};
997-
HWTEST_F(CmdlistAppendLaunchKernelWithImplicitArgsTests, givenKernelWithImplicitArgsWhenAppendLaunchKernelThenImplicitArgsAreSentToIndirectHeap) {
1003+
1004+
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenXeHpAndLaterPlatformWhenAppendLaunchKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithLocalIds) {
9981005
std::array<uint16_t, 3> localSize{2, 3, 4};
9991006
size_t totalLocalSize = localSize[0] * localSize[1] * localSize[2];
10001007

10011008
expectedImplicitArgs.localSizeX = localSize[0];
10021009
expectedImplicitArgs.localSizeY = localSize[1];
10031010
expectedImplicitArgs.localSizeZ = localSize[2];
10041011

1005-
dispatchKernelWithImplicitArgs();
1012+
dispatchKernelWithImplicitArgs<FamilyType>();
10061013

10071014
auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF));
10081015
auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize);
@@ -1017,7 +1024,20 @@ HWTEST_F(CmdlistAppendLaunchKernelWithImplicitArgsTests, givenKernelWithImplicit
10171024
auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize);
10181025
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
10191026
}
1020-
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenKernelWithImplicitArgsAndHwGeneratedLocalIdsWhenAppendLaunchKernelThenImplicitArgsLocalIdsRespectWalkOrder) {
1027+
1028+
HWCMDTEST_F(IGFX_GEN8_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenPreXeHpPlatformWhenAppendLaunchKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithoutLocalIds) {
1029+
dispatchKernelWithImplicitArgs<FamilyType>();
1030+
1031+
auto implicitArgsInIndirectData = indirectHeapAllocation->getUnderlyingBuffer();
1032+
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
1033+
1034+
auto crossThreadDataInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), 0x80);
1035+
1036+
auto programmedImplicitArgsGpuVA = reinterpret_cast<uint64_t *>(crossThreadDataInIndirectData)[0];
1037+
EXPECT_EQ(indirectHeapAllocation->getGpuAddress(), programmedImplicitArgsGpuVA);
1038+
}
1039+
1040+
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenXeHpAndLaterPlatformAndHwGeneratedLocalIdsWhenAppendLaunchKernelWithImplicitArgsThenImplicitArgsLocalIdsRespectWalkOrder) {
10211041
workgroupDimOrder[0] = 2;
10221042
workgroupDimOrder[1] = 1;
10231043
workgroupDimOrder[2] = 0;
@@ -1034,7 +1054,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
10341054
expectedImplicitArgs.localSizeY = localSize[1];
10351055
expectedImplicitArgs.localSizeZ = localSize[2];
10361056

1037-
dispatchKernelWithImplicitArgs();
1057+
dispatchKernelWithImplicitArgs<FamilyType>();
10381058

10391059
auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF));
10401060
auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize);
@@ -1050,7 +1070,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
10501070
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
10511071
}
10521072

1053-
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenKernelWithImplicitArgsWhenAppendLaunchKernelWithSimd1ThenLocalIdsAreGeneratedCorrectly) {
1073+
HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenXeHpAndLaterPlatformWhenAppendLaunchKernelWithImplicitArgsAndSimd1ThenLocalIdsAreGeneratedCorrectly) {
10541074
workgroupDimOrder[0] = 2;
10551075
workgroupDimOrder[1] = 1;
10561076
workgroupDimOrder[2] = 0;
@@ -1060,7 +1080,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
10601080
expectedImplicitArgs.localSizeY = 2;
10611081
expectedImplicitArgs.localSizeZ = 1;
10621082

1063-
dispatchKernelWithImplicitArgs();
1083+
dispatchKernelWithImplicitArgs<FamilyType>();
10641084

10651085
uint16_t expectedLocalIds[][3] = {{0, 0, 0},
10661086
{0, 1, 0},

opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,24 +112,21 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
112112
uint32_t &sizeCrossThreadData) {
113113
indirectHeap.align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
114114

115-
auto offsetCrossThreadData = indirectHeap.getUsed();
116-
char *pDest = nullptr;
117-
118115
auto pImplicitArgs = kernel.getImplicitArgs();
119116
if (pImplicitArgs) {
120-
pImplicitArgs->localIdTablePtr = indirectHeap.getGraphicsAllocation()->getGpuAddress() + offsetCrossThreadData;
121-
122117
const auto &kernelDescriptor = kernel.getDescriptor();
123118
const auto &hwInfo = kernel.getHardwareInfo();
124119
auto sizeForImplicitArgsProgramming = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo);
125120

126-
auto sizeForLocalIdsProgramming = sizeForImplicitArgsProgramming - sizeof(ImplicitArgs);
127-
offsetCrossThreadData += sizeForLocalIdsProgramming;
128-
121+
auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed();
129122
auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming);
130-
131123
ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, hwInfo, {});
124+
125+
auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast<uint64_t *>(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implcitArgsBuffer);
126+
*implicitArgsCrossThreadPtr = implicitArgsGpuVA;
132127
}
128+
auto offsetCrossThreadData = indirectHeap.getUsed();
129+
char *pDest = nullptr;
133130

134131
pDest = static_cast<char *>(indirectHeap.getSpace(sizeCrossThreadData));
135132
memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData);

opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,7 +1566,7 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
15661566
MockKernel kernelWithoutImplicitArgs(program.get(), kernelInfo, *pClDevice);
15671567
ASSERT_EQ(CL_SUCCESS, kernelWithoutImplicitArgs.initialize());
15681568

1569-
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
1569+
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(kernelInfo.kernelDescriptor);
15701570
MockKernel kernelWithImplicitArgs(program.get(), kernelInfo, *pClDevice);
15711571
ASSERT_EQ(CL_SUCCESS, kernelWithImplicitArgs.initialize());
15721572

@@ -1612,11 +1612,9 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
16121612
auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels;
16131613
auto simdSize = kernelInfo.getMaxSimdSize();
16141614
uint32_t grfSize = sizeof(typename FamilyType::GRF);
1615-
auto grfSizeForImplicitArgs = ImplicitArgsHelper::getGrfSize(simdSize, grfSize);
16161615
auto size = kernelWithImplicitArgs.getCrossThreadDataSize() +
16171616
HardwareCommandsHelper<FamilyType>::getPerThreadDataSizeTotal(simdSize, grfSize, numChannels, Math::computeTotalElementsCount(localWorkgroupSize)) +
1618-
sizeof(ImplicitArgs) +
1619-
alignUp(HardwareCommandsHelper<FamilyType>::getPerThreadDataSizeTotal(simdSize, grfSizeForImplicitArgs, 3u, Math::computeTotalElementsCount(localWorkgroupSize)), MemoryConstants::cacheLineSize);
1617+
ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernelWithImplicitArgs.getImplicitArgs(), kernelWithImplicitArgs.getDescriptor(), *defaultHwInfo);
16201618

16211619
size = alignUp(size, MemoryConstants::cacheLineSize);
16221620
EXPECT_EQ(size, iohSizeWithImplicitArgs);

opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUn
633633
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false;
634634
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = true;
635635
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
636+
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(mockKernel.kernelInfo.kernelDescriptor);
636637
mockKernel.mockKernel->pImplicitArgs = std::make_unique<ImplicitArgs>();
637638
*mockKernel.mockKernel->pImplicitArgs = {};
638639

@@ -678,7 +679,7 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWithStringMapDisbaledAndI
678679
mockKernel.kernelInfo.addToPrintfStringsMap(0, testString);
679680
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false;
680681
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = false;
681-
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
682+
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(mockKernel.kernelInfo.kernelDescriptor);
682683
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
683684
mockKernel.mockKernel->pImplicitArgs = std::make_unique<ImplicitArgs>();
684685
*mockKernel.mockKernel->pImplicitArgs = {};

opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "shared/source/os_interface/os_context.h"
1515
#include "shared/test/common/cmd_parse/hw_parse.h"
1616
#include "shared/test/common/helpers/debug_manager_state_restore.h"
17+
#include "shared/test/common/helpers/unit_test_helper.h"
1718
#include "shared/test/common/mocks/mock_allocation_properties.h"
1819
#include "shared/test/common/mocks/mock_graphics_allocation.h"
1920
#include "shared/test/common/test_macros/test_checks_shared.h"
@@ -1240,7 +1241,7 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
12401241

12411242
auto pKernelInfo = std::make_unique<MockKernelInfo>();
12421243
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = expectedImplicitArgs.simdWidth;
1243-
pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
1244+
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(pKernelInfo->kernelDescriptor);
12441245
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = workgroupDimOrder[0];
12451246
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = workgroupDimOrder[1];
12461247
pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = workgroupDimOrder[2];
@@ -1254,6 +1255,8 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
12541255

12551256
ASSERT_NE(nullptr, pImplicitArgs);
12561257

1258+
kernel.setCrossThreadData(nullptr, sizeof(uint64_t));
1259+
12571260
kernel.setWorkDim(expectedImplicitArgs.numWorkDim);
12581261
kernel.setLocalWorkSizeValues(expectedImplicitArgs.localSizeX, expectedImplicitArgs.localSizeY, expectedImplicitArgs.localSizeZ);
12591262
kernel.setGlobalWorkSizeValues(static_cast<uint32_t>(expectedImplicitArgs.globalSizeX), static_cast<uint32_t>(expectedImplicitArgs.globalSizeY), static_cast<uint32_t>(expectedImplicitArgs.globalSizeZ));
@@ -1272,7 +1275,9 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
12721275

12731276
EXPECT_LE(implicitArgsProgrammingSize, indirectHeap.getUsed());
12741277

1275-
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
1278+
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
1279+
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
1280+
}
12761281
}
12771282

12781283
ImplicitArgs expectedImplicitArgs = {sizeof(ImplicitArgs)};
@@ -1281,14 +1286,26 @@ struct HardwareCommandsImplicitArgsTests : Test<ClDeviceFixture> {
12811286
uint32_t implicitArgsProgrammingSize = 0u;
12821287
};
12831288

1284-
HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendingCrossThreadDataThenImplicitArgsAreSetAtTheBeginningOfIndirectData) {
1289+
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenXeHpAndLaterPlatformWhenSendingIndirectStateForKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithLocalIds) {
12851290
dispatchKernelWithImplicitArgs<FamilyType>();
12861291

12871292
auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs);
12881293
auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize);
12891294
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
12901295
}
12911296

1297+
HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsImplicitArgsTests, givenPreXeHpPlatformWhenSendingIndirectStateForKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithoutLocalIds) {
1298+
dispatchKernelWithImplicitArgs<FamilyType>();
1299+
1300+
auto implicitArgsInIndirectData = indirectHeapAllocation->getUnderlyingBuffer();
1301+
EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs)));
1302+
1303+
auto crossThreadDataInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), 0x80);
1304+
1305+
auto programmedImplicitArgsGpuVA = reinterpret_cast<uint64_t *>(crossThreadDataInIndirectData)[0];
1306+
EXPECT_EQ(indirectHeapAllocation->getGpuAddress(), programmedImplicitArgsGpuVA);
1307+
}
1308+
12921309
HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndRuntimeLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) {
12931310
DebugManagerStateRestore restorer;
12941311
DebugManager.flags.EnableHwGenerationLocalIds.set(0);

0 commit comments

Comments
 (0)