Skip to content

Commit 14e338e

Browse files
Revert "Optimize timestamp packet dependencies"
This reverts commit c365b422963917e7b882f9db985969c036f0fa3f. Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent eb97469 commit 14e338e

File tree

11 files changed

+39
-529
lines changed

11 files changed

+39
-529
lines changed

opencl/source/command_queue/command_queue.cpp

Lines changed: 5 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,7 @@ void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint
683683

684684
uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const {
685685
const CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)];
686+
DEBUG_BREAK_IF(!state.isValid());
686687
return state.taskCount;
687688
}
688689

@@ -707,6 +708,10 @@ void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, Timestamp
707708

708709
previousNodes.swapNodes(*timestampPacketContainer);
709710

711+
if ((previousNodes.peekNodes().size() > 0) && (previousNodes.peekNodes()[0]->getAllocator() != allocator)) {
712+
clearAllDependencies = false;
713+
}
714+
710715
if (clearAllDependencies) {
711716
previousNodes.moveNodesToNewContainer(*deferredTimestampPackets);
712717
}
@@ -1011,61 +1016,4 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
10111016
}
10121017
}
10131018

1014-
void CommandQueue::setupBarrierTimestampForBcsEngines(aub_stream::EngineType engineType, TimestampPacketDependencies &timestampPacketDependencies) {
1015-
if (!getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()) {
1016-
return;
1017-
}
1018-
1019-
// Ensure we have exactly 1 barrier node.
1020-
if (timestampPacketDependencies.barrierNodes.peekNodes().empty()) {
1021-
timestampPacketDependencies.barrierNodes.add(getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
1022-
}
1023-
1024-
if (isOOQEnabled()) {
1025-
// Barrier node will be signalled on gpgpuCsr. Save it for later use on blitters.
1026-
for (auto currentBcsIndex = 0u; currentBcsIndex < bcsTimestampPacketContainers.size(); currentBcsIndex++) {
1027-
const auto currentBcsEngineType = EngineHelpers::mapBcsIndexToEngineType(currentBcsIndex, true);
1028-
if (currentBcsEngineType == engineType) {
1029-
// Node is already added to barrierNodes for this engine, no need to save it.
1030-
continue;
1031-
}
1032-
1033-
// Save latest timestamp (override previous, if any).
1034-
TimestampPacketContainer newContainer{};
1035-
newContainer.assignAndIncrementNodesRefCounts(timestampPacketDependencies.barrierNodes);
1036-
bcsTimestampPacketContainers[currentBcsIndex].lastBarrierToWaitFor.swapNodes(newContainer);
1037-
}
1038-
}
1039-
}
1040-
1041-
void CommandQueue::processBarrierTimestampForBcsEngine(aub_stream::EngineType bcsEngineType, TimestampPacketDependencies &blitDependencies) {
1042-
BcsTimestampPacketContainers &bcsContainers = bcsTimestampPacketContainers[EngineHelpers::getBcsIndex(bcsEngineType)];
1043-
bcsContainers.lastBarrierToWaitFor.moveNodesToNewContainer(blitDependencies.barrierNodes);
1044-
}
1045-
1046-
void CommandQueue::setLastBcsPacket(aub_stream::EngineType bcsEngineType) {
1047-
if (isOOQEnabled()) {
1048-
TimestampPacketContainer dummyContainer{};
1049-
dummyContainer.assignAndIncrementNodesRefCounts(*this->timestampPacketContainer);
1050-
1051-
BcsTimestampPacketContainers &bcsContainers = bcsTimestampPacketContainers[EngineHelpers::getBcsIndex(bcsEngineType)];
1052-
bcsContainers.lastSignalledPacket.swapNodes(dummyContainer);
1053-
}
1054-
}
1055-
1056-
void CommandQueue::fillCsrDependenciesWithLastBcsPackets(CsrDependencies &csrDeps) {
1057-
for (BcsTimestampPacketContainers &bcsContainers : bcsTimestampPacketContainers) {
1058-
if (bcsContainers.lastSignalledPacket.peekNodes().empty()) {
1059-
continue;
1060-
}
1061-
csrDeps.timestampPacketContainer.push_back(&bcsContainers.lastSignalledPacket);
1062-
}
1063-
}
1064-
1065-
void CommandQueue::clearLastBcsPackets() {
1066-
for (BcsTimestampPacketContainers &bcsContainers : bcsTimestampPacketContainers) {
1067-
bcsContainers.lastSignalledPacket.moveNodesToNewContainer(*deferredTimestampPackets);
1068-
}
1069-
}
1070-
10711019
} // namespace NEO

opencl/source/command_queue/command_queue.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -324,12 +324,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
324324
void updateLatestSentEnqueueType(EnqueueProperties::Operation newEnqueueType) { this->latestSentEnqueueType = newEnqueueType; }
325325
EnqueueProperties::Operation peekLatestSentEnqueueOperation() { return this->latestSentEnqueueType; }
326326

327-
void setupBarrierTimestampForBcsEngines(aub_stream::EngineType engineType, TimestampPacketDependencies &timestampPacketDependencies);
328-
void processBarrierTimestampForBcsEngine(aub_stream::EngineType bcsEngineType, TimestampPacketDependencies &blitDependencies);
329-
void setLastBcsPacket(aub_stream::EngineType bcsEngineType);
330-
void fillCsrDependenciesWithLastBcsPackets(CsrDependencies &csrDeps);
331-
void clearLastBcsPackets();
332-
333327
// taskCount of last task
334328
uint32_t taskCount = 0;
335329

@@ -415,11 +409,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
415409

416410
std::unique_ptr<TimestampPacketContainer> deferredTimestampPackets;
417411
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
418-
struct BcsTimestampPacketContainers {
419-
TimestampPacketContainer lastBarrierToWaitFor;
420-
TimestampPacketContainer lastSignalledPacket;
421-
};
422-
std::array<BcsTimestampPacketContainers, bcsInfoMaskSize> bcsTimestampPacketContainers;
423412
};
424413

425414
template <typename PtrType>

opencl/source/command_queue/enqueue_common.h

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
248248
timestampPacketDependencies, eventsRequest, blockQueue);
249249
}
250250

251-
if (!blockQueue && isOOQEnabled()) {
252-
setupBarrierTimestampForBcsEngines(computeCommandStreamReceiver.getOsContext().getEngineType(), timestampPacketDependencies);
253-
}
254-
255251
if (eventBuilder.getEvent() && computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
256252
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
257253
eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes);
@@ -540,6 +536,8 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
540536
hwInfo,
541537
args);
542538
}
539+
540+
TimestampPacketHelper::programSemaphore<GfxFamily>(*commandStream, *currentTimestampPacketNode);
543541
}
544542
return blitProperties;
545543
}
@@ -895,13 +893,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
895893
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
896894
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
897895

898-
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
899-
900896
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && !clearDependenciesForSubCapture) {
901897
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
902-
if (isHandlingBarrier) {
903-
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
904-
}
905898
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
906899
}
907900

@@ -939,10 +932,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
939932
dispatchFlags,
940933
getDevice());
941934

942-
if (isHandlingBarrier) {
943-
clearLastBcsPackets();
944-
}
945-
946935
if (gtpinIsGTPinInitialized()) {
947936
gtpinNotifyFlushTask(completionStamp.taskCount);
948937
}
@@ -1125,13 +1114,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
11251114
false, //memoryMigrationRequired
11261115
false); //textureCacheFlush
11271116

1128-
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
1129-
11301117
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
11311118
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
1132-
if (isHandlingBarrier) {
1133-
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
1134-
}
11351119
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
11361120
}
11371121

@@ -1144,10 +1128,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
11441128
taskLevel,
11451129
dispatchFlags,
11461130
getDevice());
1147-
1148-
if (isHandlingBarrier) {
1149-
clearLastBcsPackets();
1150-
}
11511131
}
11521132

11531133
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
@@ -1223,10 +1203,9 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
12231203
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
12241204
}
12251205

1226-
if (!blockQueue) {
1227-
setupBarrierTimestampForBcsEngines(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies);
1206+
if (!blockQueue && getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()) {
1207+
timestampPacketDependencies.barrierNodes.add(allocator->getTag());
12281208
}
1229-
processBarrierTimestampForBcsEngine(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies);
12301209

12311210
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
12321211
csrDeps.timestampPacketContainer.push_back(&timestampPacketDependencies.previousEnqueueNodes);
@@ -1259,8 +1238,6 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
12591238
}
12601239

12611240
this->latestSentEnqueueType = enqueueProperties.operation;
1262-
1263-
setLastBcsPacket(bcsCsr.getOsContext().getEngineType());
12641241
}
12651242
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
12661243

opencl/source/helpers/task_information.cpp

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
217217
commandQueue.getGpgpuCommandStreamReceiver(), *bcsCsrForAuxTranslation);
218218
}
219219

220-
if (timestampPacketDependencies && commandQueue.isOOQEnabled()) {
221-
commandQueue.setupBarrierTimestampForBcsEngines(commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), *timestampPacketDependencies);
222-
}
223-
224220
const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
225221

226222
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired(), commandQueue.getDevice().getHardwareInfo());
@@ -260,13 +256,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
260256
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
261257
}
262258

263-
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
264-
265259
if (timestampPacketDependencies) {
266260
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
267-
if (isHandlingBarrier) {
268-
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
269-
}
270261
dispatchFlags.barrierTimestampPacketNodes = &timestampPacketDependencies->barrierNodes;
271262
}
272263
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
@@ -300,10 +291,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
300291
dispatchFlags,
301292
commandQueue.getDevice());
302293

303-
if (isHandlingBarrier) {
304-
commandQueue.clearLastBcsPackets();
305-
}
306-
307294
if (kernelOperation->blitPropertiesContainer.size() > 0) {
308295
const auto newTaskCount = bcsCsrForAuxTranslation->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
309296
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount);
@@ -345,7 +332,6 @@ void CommandWithoutKernel::dispatchBlitOperation() {
345332

346333
const auto newTaskCount = bcsCsr->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
347334
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
348-
commandQueue.setLastBcsPacket(bcsCsr->getOsContext().getEngineType());
349335
}
350336

351337
CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) {
@@ -378,10 +364,6 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
378364
}
379365
}
380366

381-
if (timestampPacketDependencies && commandQueue.isOOQEnabled()) {
382-
commandQueue.setupBarrierTimestampForBcsEngines(commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), *timestampPacketDependencies);
383-
}
384-
385367
auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex();
386368
DispatchFlags dispatchFlags(
387369
{}, //csrDependencies
@@ -418,13 +400,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
418400
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
419401
}
420402

421-
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
422-
423403
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
424404
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
425-
if (isHandlingBarrier) {
426-
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
427-
}
428405
makeTimestampPacketsResident(commandStreamReceiver);
429406
}
430407

@@ -439,10 +416,6 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
439416
dispatchFlags,
440417
commandQueue.getDevice());
441418

442-
if (isHandlingBarrier) {
443-
commandQueue.clearLastBcsPackets();
444-
}
445-
446419
if (kernelOperation->blitEnqueue) {
447420
dispatchBlitOperation();
448421
}

opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,12 +233,6 @@ struct BlitEnqueueTests : public ::testing::Test {
233233
return commandItor;
234234
}
235235

236-
template <typename Command>
237-
void expectNoCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
238-
auto commandItor = find<Command *>(itorStart, itorEnd);
239-
EXPECT_TRUE(commandItor == itorEnd);
240-
}
241-
242236
template <typename Family>
243237
void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) {
244238
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
@@ -1031,10 +1025,13 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithNoTimestampPacketTests, givenNoTimestampPacket
10311025
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(bcsCommands.begin(), bcsCommands.end());
10321026

10331027
cmdFound = expectMiFlush<MI_FLUSH_DW>(cmdFound++, bcsCommands.end());
1028+
auto miflushDwCmd = genCmdCast<MI_FLUSH_DW *>(*cmdFound);
1029+
const auto bcsSignalAddress = miflushDwCmd->getDestinationAddress();
10341030

10351031
cmdFound = expectCommand<WALKER_TYPE>(ccsCommands.begin(), ccsCommands.end());
10361032

1037-
expectNoCommand<MI_SEMAPHORE_WAIT>(cmdFound++, ccsCommands.end());
1033+
cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdFound++, ccsCommands.end());
1034+
verifySemaphore<FamilyType>(cmdFound, bcsSignalAddress);
10381035
}
10391036

10401037
struct BlitEnqueueWithDebugCapabilityTests : public BlitEnqueueTests<0> {
@@ -1789,7 +1786,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushR
17891786
}
17901787
}
17911788

1792-
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenClearDependencies) {
1789+
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenDontClearDependencies) {
17931790
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
17941791
const bool clearDependencies = true;
17951792

@@ -1799,6 +1796,12 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionT
17991796
EXPECT_EQ(0u, previousNodes.peekNodes().size());
18001797
}
18011798

1799+
{
1800+
TimestampPacketContainer previousNodes;
1801+
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
1802+
EXPECT_EQ(1u, previousNodes.peekNodes().size());
1803+
}
1804+
18021805
{
18031806
TimestampPacketContainer previousNodes;
18041807
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);

0 commit comments

Comments
 (0)