Skip to content

Commit c93998b

Browse files
performance: Do not program additional synchronization on integrated
Related-To: NEO-12324 Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent c98b0c3 commit c93998b

File tree

6 files changed

+65
-44
lines changed

6 files changed

+65
-44
lines changed

opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,7 +1255,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
12551255
auto miSemaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(cmdIterator++));
12561256
EXPECT_NE(nullptr, miSemaphoreWaitCmd);
12571257
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd));
1258-
} else {
1258+
} else if (MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(pDevice->getRootDeviceEnvironment()) > 0) {
12591259
cmdIterator++;
12601260
}
12611261
}
@@ -1286,7 +1286,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
12861286
auto miSemaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(cmdIterator++));
12871287
EXPECT_NE(nullptr, miSemaphoreWaitCmd);
12881288
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd));
1289-
} else {
1289+
} else if (MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(pDevice->getRootDeviceEnvironment()) > 0) {
12901290
cmdIterator++;
12911291
}
12921292
}

opencl/test/unit_test/xe2_hpg_core/command_stream_receiver_hw_tests_xe2_hpg_core.cpp

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -396,11 +396,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
396396
auto &postSyncData = walkerCmd->getPostSync();
397397
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
398398

399-
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
400-
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
401-
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
402-
ASSERT_NE(nullptr, fenceCmd);
403-
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
399+
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
400+
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
401+
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
402+
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
403+
ASSERT_NE(nullptr, fenceCmd);
404+
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
405+
}
404406
}
405407

406408
XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
@@ -432,11 +434,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
432434
auto &postSyncData = walkerCmd->getPostSync();
433435
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
434436

435-
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
436-
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
437-
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
438-
ASSERT_NE(nullptr, fenceCmd);
439-
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
437+
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
438+
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
439+
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
440+
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
441+
ASSERT_NE(nullptr, fenceCmd);
442+
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
443+
}
440444
}
441445

442446
XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
@@ -468,11 +472,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
468472
auto &postSyncData = walkerCmd->getPostSync();
469473
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
470474

471-
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
472-
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
473-
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
474-
ASSERT_NE(nullptr, fenceCmd);
475-
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
475+
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
476+
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
477+
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
478+
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
479+
ASSERT_NE(nullptr, fenceCmd);
480+
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
481+
}
476482

477483
auto event = castToObject<Event>(kernelEvent);
478484
event->release();
@@ -508,11 +514,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
508514
auto &postSyncData = walkerCmd->getPostSync();
509515
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
510516

511-
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
512-
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
513-
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
514-
ASSERT_NE(nullptr, fenceCmd);
515-
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
517+
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
518+
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
519+
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
520+
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
521+
ASSERT_NE(nullptr, fenceCmd);
522+
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
523+
}
516524

517525
auto event = castToObject<Event>(kernelEvent);
518526
event->release();
@@ -549,11 +557,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
549557
auto &postSyncData = walkerCmd->getPostSync();
550558
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
551559

552-
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
553-
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
554-
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
555-
ASSERT_NE(nullptr, fenceCmd);
556-
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
560+
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
561+
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
562+
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
563+
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
564+
ASSERT_NE(nullptr, fenceCmd);
565+
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
566+
}
557567

558568
auto event = castToObject<Event>(kernelEvent);
559569
event->release();

shared/source/helpers/gfx_core_helper.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,11 @@ struct MemorySynchronizationCommands {
483483

484484
static void setBarrierWaFlags(void *barrierCmd);
485485

486+
enum class AdditionalSynchronizationType : uint32_t {
487+
semaphore = 0,
488+
fence,
489+
none
490+
};
486491
static void addAdditionalSynchronizationForDirectSubmission(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment);
487492
static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment);
488493
static void setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment);

shared/source/xe2_hpg_core/gfx_core_helper_xe2_hpg_core.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -166,27 +166,28 @@ aub_stream::MMIOList GfxCoreHelperHw<Family>::getExtraMmioList(const HardwareInf
166166

167167
template <>
168168
size_t MemorySynchronizationCommands<Family>::getSizeForSingleAdditionalSynchronization(const RootDeviceEnvironment &rootDeviceEnvironment) {
169-
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = true;
169+
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = rootDeviceEnvironment.getHardwareInfo()->capabilityTable.isIntegratedDevice ? AdditionalSynchronizationType::none : AdditionalSynchronizationType::fence;
170170
if (debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() != -1) {
171-
programGlobalFenceAsMiMemFenceCommandInCommandStream = !!debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get();
171+
programGlobalFenceAsMiMemFenceCommandInCommandStream = static_cast<AdditionalSynchronizationType>(debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get());
172172
}
173173

174-
if (programGlobalFenceAsMiMemFenceCommandInCommandStream) {
174+
if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::fence) {
175175
return sizeof(Family::MI_MEM_FENCE);
176-
} else {
176+
} else if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::semaphore) {
177177
return EncodeSemaphore<Family>::getSizeMiSemaphoreWait();
178178
}
179+
return 0;
179180
}
180181

181182
template <>
182183
void MemorySynchronizationCommands<Family>::setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment) {
183184
using MI_MEM_FENCE = typename Family::MI_MEM_FENCE;
184185
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
185-
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = true;
186+
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = rootDeviceEnvironment.getHardwareInfo()->capabilityTable.isIntegratedDevice ? AdditionalSynchronizationType::none : AdditionalSynchronizationType::fence;
186187
if (debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() != -1) {
187-
programGlobalFenceAsMiMemFenceCommandInCommandStream = !!debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get();
188+
programGlobalFenceAsMiMemFenceCommandInCommandStream = static_cast<AdditionalSynchronizationType>(debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get());
188189
}
189-
if (programGlobalFenceAsMiMemFenceCommandInCommandStream) {
190+
if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::fence) {
190191
MI_MEM_FENCE miMemFence = Family::cmdInitMemFence;
191192
if (acquire) {
192193
miMemFence.setFenceType(Family::MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_ACQUIRE);
@@ -195,7 +196,7 @@ void MemorySynchronizationCommands<Family>::setAdditionalSynchronization(void *&
195196
}
196197
*reinterpret_cast<MI_MEM_FENCE *>(commandsBuffer) = miMemFence;
197198
commandsBuffer = ptrOffset(commandsBuffer, sizeof(MI_MEM_FENCE));
198-
} else {
199+
} else if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::semaphore) {
199200
EncodeSemaphore<Family>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandsBuffer),
200201
gpuAddress,
201202
EncodeSemaphore<Family>::invalidHardwareTag,

shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3033,7 +3033,10 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT
30333033
auto miMemFence = genCmdCast<typename FamilyType::MI_MEM_FENCE *>(*++cmdIterator);
30343034

30353035
fenceExpected &= getHelper<ProductHelper>().isDeviceToHostCopySignalingFenceRequired();
3036-
size_t expectedFenceCount = fenceExpected ? 3 : 2;
3036+
size_t expectedFenceCount = fenceExpected ? 1 : 0;
3037+
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
3038+
expectedFenceCount += 2;
3039+
}
30373040

30383041
auto fences = findAll<typename FamilyType::MI_MEM_FENCE *>(cmdIterator, cmdList.end());
30393042
EXPECT_EQ(expectedFenceCount, fences.size());

shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -417,15 +417,15 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenAskedIfFe
417417
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDefaultMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) {
418418
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
419419

420-
EXPECT_EQ(sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
420+
EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
421421
}
422422

423423
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDebugMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) {
424424
DebugManagerStateRestore restorer;
425425
debugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1);
426426
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
427427

428-
EXPECT_EQ(2 * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
428+
EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * 2 * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
429429
}
430430

431431
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDontProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) {
@@ -456,12 +456,14 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDefaultMemorySynchronizati
456456

457457
MemorySynchronizationCommands<FamilyType>::addAdditionalSynchronization(commandStream, 0x0, false, rootDeviceEnvironment);
458458

459-
HardwareParse hwParser;
460-
hwParser.parseCommands<FamilyType>(commandStream);
461-
EXPECT_EQ(1u, hwParser.cmdList.size());
462-
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*hwParser.cmdList.begin());
463-
ASSERT_NE(nullptr, fenceCmd);
464-
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
459+
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(rootDeviceEnvironment) > 0) {
460+
HardwareParse hwParser;
461+
hwParser.parseCommands<FamilyType>(commandStream);
462+
EXPECT_EQ(1u, hwParser.cmdList.size());
463+
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*hwParser.cmdList.begin());
464+
ASSERT_NE(nullptr, fenceCmd);
465+
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
466+
}
465467
}
466468

467469
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDontProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenAddingAdditionalSynchronizationThenSemaphoreWaitIsCalled) {

0 commit comments

Comments
 (0)