Skip to content

Commit 2b89486

Browse files
Program Semaphore to keep dependency on previous enqueue
Change-Id: I511f39811769f1add179ea5d9cb331fa9c5ccec2 Signed-off-by: Dunajski, Bartosz <[email protected]>
1 parent 581805c commit 2b89486

File tree

8 files changed

+132
-19
lines changed

8 files changed

+132
-19
lines changed

runtime/command_queue/enqueue_common.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,17 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
241241
}
242242
}
243243

244-
TimestampPacket *timestampPacket = nullptr;
244+
TimestampPacket *currentTimestampPacket = nullptr;
245+
TimestampPacket *previousTimestampPacket = nullptr;
245246
if (device->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
247+
auto previousTimestampPacketNode = timestampPacketNode;
246248
obtainNewTimestampPacketNode();
247-
timestampPacket = timestampPacketNode->tag;
249+
currentTimestampPacket = timestampPacketNode->tag;
250+
251+
if (previousTimestampPacketNode && !previousTimestampPacketNode->tag->canBeReleased()) {
252+
// keep dependency on previous enqueue
253+
previousTimestampPacket = previousTimestampPacketNode->tag;
254+
}
248255
}
249256

250257
if (eventBuilder.getEvent()) {
@@ -281,7 +288,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
281288
&blockedCommandsData,
282289
hwTimeStamps,
283290
hwPerfCounter,
284-
timestampPacket,
291+
previousTimestampPacket,
292+
currentTimestampPacket,
285293
preemption,
286294
blockQueue,
287295
commandType);

runtime/command_queue/gpgpu_walker.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ class GpgpuWalkerHelper {
206206
KernelOperation **blockedCommandsData,
207207
HwTimeStamps *hwTimeStamps,
208208
OCLRT::HwPerfCounter *hwPerfCounter,
209-
TimestampPacket *timestampPacket,
209+
TimestampPacket *previousTimestampPacket,
210+
TimestampPacket *currentTimestampPacket,
210211
PreemptionMode preemptionMode,
211212
bool blockQueue,
212213
uint32_t commandType = 0);
@@ -297,7 +298,7 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, cl_uint numEventsInWa
297298
}
298299
if (commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
299300
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
300-
expectedSizeCS += numEventsInWaitList * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
301+
expectedSizeCS += (numEventsInWaitList + 1) * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
301302
}
302303
return commandQueue.getCS(expectedSizeCS);
303304
}

runtime/command_queue/gpgpu_walker.inl

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
435435
KernelOperation **blockedCommandsData,
436436
HwTimeStamps *hwTimeStamps,
437437
OCLRT::HwPerfCounter *hwPerfCounter,
438-
TimestampPacket *timestampPacket,
438+
TimestampPacket *previousTimestampPacket,
439+
TimestampPacket *currentTimestampPacket,
439440
PreemptionMode preemptionMode,
440441
bool blockQueue,
441442
uint32_t commandType) {
@@ -497,6 +498,10 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
497498
if (commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
498499
GpgpuWalkerHelper<GfxFamily>::dispatchOnDeviceWaitlistSemaphores(commandStream, commandQueue.getDevice(),
499500
numEventsInWaitList, eventWaitList);
501+
if (previousTimestampPacket) {
502+
auto compareAddress = previousTimestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
503+
KernelCommandsHelper<GfxFamily>::programMiSemaphoreWait(*commandStream, compareAddress, 1);
504+
}
500505
}
501506

502507
dsh->align(KernelCommandsHelper<GfxFamily>::alignInterfaceDescriptorData);
@@ -590,9 +595,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
590595

591596
dispatchWorkarounds(commandStream, commandQueue, kernel, true);
592597

593-
bool setupTimestampPacket = timestampPacket && (currentDispatchIndex == multiDispatchInfo.size() - 1);
598+
bool setupTimestampPacket = currentTimestampPacket && (currentDispatchIndex == multiDispatchInfo.size() - 1);
594599
if (setupTimestampPacket) {
595-
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, timestampPacket,
600+
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, currentTimestampPacket,
596601
TimestampPacket::WriteOperationType::BeforeWalker);
597602
}
598603

@@ -601,7 +606,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
601606
*pWalkerCmd = GfxFamily::cmdInitGpgpuWalker;
602607

603608
if (setupTimestampPacket) {
604-
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, pWalkerCmd, timestampPacket,
609+
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, pWalkerCmd, currentTimestampPacket,
605610
TimestampPacket::WriteOperationType::AfterWalker);
606611
}
607612

unit_tests/command_queue/dispatch_walker_tests.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, shouldntChangeCommandStreamMemor
152152
nullptr,
153153
nullptr,
154154
nullptr,
155+
nullptr,
155156
pDevice->getPreemptionMode(),
156157
false);
157158

@@ -200,6 +201,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, noLocalIdsShouldntCrash) {
200201
nullptr,
201202
nullptr,
202203
nullptr,
204+
nullptr,
203205
pDevice->getPreemptionMode(),
204206
false);
205207

@@ -230,6 +232,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm)
230232
nullptr,
231233
nullptr,
232234
nullptr,
235+
nullptr,
233236
pDevice->getPreemptionMode(),
234237
false);
235238

@@ -261,6 +264,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithSquaredLwsAlgorithm)
261264
nullptr,
262265
nullptr,
263266
nullptr,
267+
nullptr,
264268
pDevice->getPreemptionMode(),
265269
false);
266270
EXPECT_EQ(dimension, *kernel.workDim);
@@ -290,6 +294,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithNDLwsAlgorithm) {
290294
nullptr,
291295
nullptr,
292296
nullptr,
297+
nullptr,
293298
pDevice->getPreemptionMode(),
294299
false);
295300
EXPECT_EQ(dimension, *kernel.workDim);
@@ -320,6 +325,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithOldLwsAlgorithm) {
320325
nullptr,
321326
nullptr,
322327
nullptr,
328+
nullptr,
323329
pDevice->getPreemptionMode(),
324330
false);
325331
EXPECT_EQ(dimension, *kernel.workDim);
@@ -350,6 +356,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) {
350356
nullptr,
351357
nullptr,
352358
nullptr,
359+
nullptr,
353360
pDevice->getPreemptionMode(),
354361
false);
355362

@@ -382,6 +389,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeND) {
382389
nullptr,
383390
nullptr,
384391
nullptr,
392+
nullptr,
385393
pDevice->getPreemptionMode(),
386394
false);
387395
EXPECT_EQ(2u, *kernel.localWorkSizeX);
@@ -413,6 +421,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeND) {
413421
nullptr,
414422
nullptr,
415423
nullptr,
424+
nullptr,
416425
pDevice->getPreemptionMode(),
417426
false);
418427
EXPECT_EQ(2u, *kernel.localWorkSizeX);
@@ -445,6 +454,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) {
445454
nullptr,
446455
nullptr,
447456
nullptr,
457+
nullptr,
448458
pDevice->getPreemptionMode(),
449459
false);
450460
EXPECT_EQ(2u, *kernel.localWorkSizeX);
@@ -477,6 +487,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquaredAn
477487
nullptr,
478488
nullptr,
479489
nullptr,
490+
nullptr,
480491
pDevice->getPreemptionMode(),
481492
false);
482493
EXPECT_EQ(2u, *kernel.localWorkSizeX);
@@ -507,6 +518,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSize) {
507518
nullptr,
508519
nullptr,
509520
nullptr,
521+
nullptr,
510522
pDevice->getPreemptionMode(),
511523
false);
512524
EXPECT_EQ(1u, *kernel.localWorkSizeX);
@@ -540,6 +552,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizes) {
540552
nullptr,
541553
nullptr,
542554
nullptr,
555+
nullptr,
543556
pDevice->getPreemptionMode(),
544557
false);
545558
EXPECT_EQ(1u, *kernel.localWorkSizeX);
@@ -577,6 +590,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizeForSplitKernel) {
577590
nullptr,
578591
nullptr,
579592
nullptr,
593+
nullptr,
580594
pDevice->getPreemptionMode(),
581595
false);
582596

@@ -628,6 +642,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizesForSplitWalker) {
628642
nullptr,
629643
nullptr,
630644
nullptr,
645+
nullptr,
631646
pDevice->getPreemptionMode(),
632647
false);
633648

@@ -683,6 +698,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs
683698
nullptr,
684699
nullptr,
685700
nullptr,
701+
nullptr,
686702
pDevice->getPreemptionMode(),
687703
blockQueue);
688704

@@ -723,6 +739,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW
723739
nullptr,
724740
nullptr,
725741
nullptr,
742+
nullptr,
726743
pDevice->getPreemptionMode(),
727744
blockQueue);
728745

@@ -761,6 +778,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhen
761778
nullptr,
762779
nullptr,
763780
nullptr,
781+
nullptr,
764782
pDevice->getPreemptionMode(),
765783
blockQueue);
766784

@@ -794,6 +812,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerWithMultipleDispatchInfo) {
794812
nullptr,
795813
nullptr,
796814
nullptr,
815+
nullptr,
797816
pDevice->getPreemptionMode(),
798817
false);
799818

@@ -836,6 +855,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
836855
nullptr,
837856
nullptr,
838857
nullptr,
858+
nullptr,
839859
pDevice->getPreemptionMode(),
840860
false);
841861

@@ -921,6 +941,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
921941
nullptr,
922942
nullptr,
923943
nullptr,
944+
nullptr,
924945
pDevice->getPreemptionMode(),
925946
false);
926947

@@ -967,6 +988,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
967988
nullptr,
968989
nullptr,
969990
nullptr,
991+
nullptr,
970992
pDevice->getPreemptionMode(),
971993
false);
972994

@@ -1018,6 +1040,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
10181040
nullptr,
10191041
nullptr,
10201042
nullptr,
1043+
nullptr,
10211044
pDevice->getPreemptionMode(),
10221045
false);
10231046

@@ -1061,7 +1084,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, givenMultiDispatchWhenWhiteliste
10611084
DispatchInfo di2(&kernel, 1, Vec3<size_t>(1, 1, 1), Vec3<size_t>(1, 1, 1), Vec3<size_t>(0, 0, 0));
10621085
MockMultiDispatchInfo multiDispatchInfo(std::vector<DispatchInfo *>({&di1, &di2}));
10631086

1064-
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ, multiDispatchInfo, 0, nullptr, nullptr, nullptr, nullptr, nullptr, pDevice->getPreemptionMode(), false);
1087+
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ, multiDispatchInfo, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, pDevice->getPreemptionMode(), false);
10651088

10661089
hwParser.parseCommands<FamilyType>(cmdStream, 0);
10671090

unit_tests/execution_model/parent_kernel_dispatch_tests.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDev
6565
nullptr,
6666
nullptr,
6767
nullptr,
68+
nullptr,
6869
pDevice->getPreemptionMode(),
6970
false);
7071

@@ -121,6 +122,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDef
121122
nullptr,
122123
nullptr,
123124
nullptr,
125+
nullptr,
124126
pDevice->getPreemptionMode(),
125127
false);
126128

@@ -147,6 +149,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSH
147149
nullptr,
148150
nullptr,
149151
nullptr,
152+
nullptr,
150153
pDevice->getPreemptionMode(),
151154
false);
152155

@@ -183,6 +186,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSiz
183186
nullptr,
184187
nullptr,
185188
nullptr,
189+
nullptr,
186190
pDevice->getPreemptionMode(),
187191
true);
188192
ASSERT_NE(nullptr, blockedCommandsData);
@@ -281,6 +285,7 @@ HWTEST_F(MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatched
281285
nullptr,
282286
nullptr,
283287
nullptr,
288+
nullptr,
284289
pDevice->getPreemptionMode(),
285290
true);
286291

@@ -315,6 +320,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispa
315320
nullptr,
316321
nullptr,
317322
nullptr,
323+
nullptr,
318324
pDevice->getPreemptionMode(),
319325
false);
320326

@@ -372,6 +378,7 @@ HWTEST_F(MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedT
372378
nullptr,
373379
nullptr,
374380
nullptr,
381+
nullptr,
375382
pDevice->getPreemptionMode(),
376383
false);
377384

@@ -408,6 +415,7 @@ HWTEST_F(MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatch
408415
nullptr,
409416
nullptr,
410417
nullptr,
418+
nullptr,
411419
pDevice->getPreemptionMode(),
412420
false);
413421

unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand
450450
nullptr,
451451
nullptr,
452452
nullptr,
453+
nullptr,
453454
device->getPreemptionMode(),
454455
true);
455456

0 commit comments

Comments
 (0)