Skip to content

Commit 51421ec

Browse files
Add experimental command buffer
This code is an infrastructure for special debug purpose that allow measure execution time of any hardware command. Change-Id: Id12a7979d204734a0c4a6c4700e427b65ac2397f
1 parent e34c472 commit 51421ec

28 files changed

+1238
-345
lines changed

runtime/command_stream/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ set(RUNTIME_SRCS_COMMAND_STREAM
3636
${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.h
3737
${CMAKE_CURRENT_SOURCE_DIR}/csr_definitions.h
3838
${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream.h
39+
${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.cpp
40+
${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.h
41+
${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.inl
3942
${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.cpp
4043
${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.h
4144
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp

runtime/command_stream/command_stream_receiver.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "runtime/built_ins/built_ins.h"
2424
#include "runtime/command_stream/command_stream_receiver.h"
25+
#include "runtime/command_stream/experimental_command_buffer.h"
2526
#include "runtime/command_stream/preemption.h"
2627
#include "runtime/device/device.h"
2728
#include "runtime/gtpin/gtpin_notify.h"
@@ -203,6 +204,7 @@ void CommandStreamReceiver::cleanupResources() {
203204
commandStream.replaceGraphicsAllocation(nullptr);
204205
commandStream.replaceBuffer(nullptr, 0);
205206
}
207+
experimentalCmdBuffer.reset(nullptr);
206208
}
207209

208210
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
@@ -338,4 +340,8 @@ void CommandStreamReceiver::releaseIndirectHeap(IndirectHeap::Type heapType) {
338340
}
339341
}
340342

343+
void CommandStreamReceiver::setExperimentalCmdBuffer(std::unique_ptr<ExperimentalCommandBuffer> &&cmdBuffer) {
344+
experimentalCmdBuffer = std::move(cmdBuffer);
345+
}
346+
341347
} // namespace OCLRT

runtime/command_stream/command_stream_receiver.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,12 @@
3737
namespace OCLRT {
3838
class Device;
3939
class EventBuilder;
40-
class LinearStream;
40+
class ExperimentalCommandBuffer;
41+
class GraphicsAllocation;
4142
class IndirectHeap;
43+
class LinearStream;
4244
class MemoryManager;
4345
class OSInterface;
44-
class GraphicsAllocation;
4546

4647
enum class DispatchMode {
4748
DeviceDefault = 0, //default for given device
@@ -139,6 +140,7 @@ class CommandStreamReceiver {
139140
void releaseIndirectHeap(IndirectHeap::Type heapType);
140141

141142
virtual enum CommandStreamReceiverType getType() = 0;
143+
void setExperimentalCmdBuffer(std::unique_ptr<ExperimentalCommandBuffer> &&cmdBuffer);
142144

143145
protected:
144146
void setDisableL3Cache(bool val) {
@@ -190,6 +192,7 @@ class CommandStreamReceiver {
190192
SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired;
191193
IndirectHeap *indirectHeap[IndirectHeap::NUM_TYPES];
192194
std::unique_ptr<FlatBatchBufferHelper> flatBatchBufferHelper;
195+
std::unique_ptr<ExperimentalCommandBuffer> experimentalCmdBuffer;
193196
};
194197

195198
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(const HardwareInfo &hwInfoIn, bool withAubDump);

runtime/command_stream/command_stream_receiver_hw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
5555
int getRequiredPipeControlSize() const;
5656

5757
static void addBatchBufferEnd(LinearStream &commandStream, void **patchLocation);
58-
void addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress);
58+
void addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary);
5959
static void alignToCacheLine(LinearStream &commandStream);
6060

6161
size_t getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags);

runtime/command_stream/command_stream_receiver_hw.inl

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
*/
2222

2323
#include "runtime/command_stream/command_stream_receiver_hw.h"
24+
#include "runtime/command_stream/experimental_command_buffer.h"
2425
#include "runtime/command_stream/linear_stream.h"
2526
#include "runtime/device/device.h"
2627
#include "runtime/gtpin/gtpin_notify.h"
@@ -62,10 +63,13 @@ inline void CommandStreamReceiverHw<GfxFamily>::addBatchBufferEnd(LinearStream &
6263
}
6364

6465
template <typename GfxFamily>
65-
inline void CommandStreamReceiverHw<GfxFamily>::addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress) {
66+
inline void CommandStreamReceiverHw<GfxFamily>::addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary) {
6667
*commandBufferMemory = GfxFamily::cmdInitBatchBufferStart;
6768
commandBufferMemory->setBatchBufferStartAddressGraphicsaddress472(startAddress);
6869
commandBufferMemory->setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
70+
if (secondary) {
71+
commandBufferMemory->setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH);
72+
}
6973
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
7074
flatBatchBufferHelper->registerBatchBufferStartAddress(reinterpret_cast<uint64_t>(commandBufferMemory), startAddress);
7175
}
@@ -328,6 +332,12 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
328332
}
329333
}
330334
}
335+
336+
if (experimentalCmdBuffer.get() != nullptr) {
337+
size_t startingOffset = experimentalCmdBuffer->programExperimentalCommandBuffer<GfxFamily>();
338+
experimentalCmdBuffer->injectBufferStart<GfxFamily>(commandStreamCSR, startingOffset);
339+
}
340+
331341
// Add a PC if we have a dependency on a previous walker to avoid concurrency issues.
332342
if (taskLevel > this->taskLevel) {
333343
addPipeControl(commandStreamCSR, false);
@@ -358,6 +368,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
358368
makeResident(*BuiltIns::getInstance().getSipKernel(sipType, *device).getSipAllocation());
359369
}
360370

371+
if (experimentalCmdBuffer.get() != nullptr) {
372+
experimentalCmdBuffer->makeResidentAllocations();
373+
}
374+
361375
// If the CSR has work in its CS, flush it before the task
362376
bool submitTask = commandStreamStartTask != commandStreamTask.getUsed();
363377
bool submitCSR = commandStreamStartCSR != commandStreamCSR.getUsed();
@@ -377,7 +391,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
377391
chainedBatchBuffer = commandStreamTask.getGraphicsAllocation();
378392
// Add MI_BATCH_BUFFER_START to chain from CSR -> Task
379393
auto pBBS = reinterpret_cast<MI_BATCH_BUFFER_START *>(commandStreamCSR.getSpace(sizeof(MI_BATCH_BUFFER_START)));
380-
addBatchBufferStart(pBBS, ptrOffset(commandStreamTask.getGraphicsAllocation()->getGpuAddress(), commandStreamStartTask));
394+
addBatchBufferStart(pBBS, ptrOffset(commandStreamTask.getGraphicsAllocation()->getGpuAddress(), commandStreamStartTask), false);
381395
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
382396
flatBatchBufferHelper->registerCommandChunk(commandStreamTask.getGraphicsAllocation()->getGpuAddress(),
383397
reinterpret_cast<uint64_t>(commandStreamTask.getCpuBase()),
@@ -510,7 +524,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
510524
flushStampUpdateHelper.insert(nextCommandBuffer->flushStamp->getStampReference());
511525
auto nextCommandBufferAddress = nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBuffer();
512526
auto offsetedCommandBuffer = (uint64_t)ptrOffset(nextCommandBufferAddress, nextCommandBuffer->batchBuffer.startOffset);
513-
addBatchBufferStart((MI_BATCH_BUFFER_START *)currentBBendLocation, offsetedCommandBuffer);
527+
addBatchBufferStart((MI_BATCH_BUFFER_START *)currentBBendLocation, offsetedCommandBuffer, false);
514528
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
515529
flatBatchBufferHelper->registerCommandChunk(nextCommandBuffer->batchBuffer, sizeof(MI_BATCH_BUFFER_START));
516530
}
@@ -610,6 +624,9 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
610624
size += sizeof(typename GfxFamily::PIPE_CONTROL);
611625
}
612626
}
627+
if (experimentalCmdBuffer.get() != nullptr) {
628+
size += experimentalCmdBuffer->getRequiredInjectionSize<GfxFamily>();
629+
}
613630
return size;
614631
}
615632

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* Copyright (c) 2018, Intel Corporation
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a
5+
* copy of this software and associated documentation files (the "Software"),
6+
* to deal in the Software without restriction, including without limitation
7+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8+
* and/or sell copies of the Software, and to permit persons to whom the
9+
* Software is furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be included
12+
* in all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15+
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18+
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19+
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20+
* OTHER DEALINGS IN THE SOFTWARE.
21+
*/
22+
23+
#include "runtime/command_stream/command_stream_receiver.h"
24+
#include "runtime/command_stream/experimental_command_buffer.h"
25+
#include "runtime/command_stream/linear_stream.h"
26+
#include "runtime/device/device.h"
27+
#include "runtime/memory_manager/memory_constants.h"
28+
#include "runtime/memory_manager/memory_manager.h"
29+
#include <cstring>
30+
#include <type_traits>
31+
32+
namespace OCLRT {
33+
34+
ExperimentalCommandBuffer::ExperimentalCommandBuffer(CommandStreamReceiver *csr) : commandStreamReceiver(csr),
35+
currentStream(nullptr),
36+
timestampsOffset(0),
37+
experimentalAllocationOffset(0),
38+
defaultPrint(true) {
39+
timestamps = csr->getMemoryManager()->allocateGraphicsMemory(MemoryConstants::pageSize);
40+
memset(timestamps->getUnderlyingBuffer(), 0, timestamps->getUnderlyingBufferSize());
41+
experimentalAllocation = csr->getMemoryManager()->allocateGraphicsMemory(MemoryConstants::pageSize);
42+
memset(experimentalAllocation->getUnderlyingBuffer(), 0, experimentalAllocation->getUnderlyingBufferSize());
43+
timerResolution = commandStreamReceiver->getMemoryManager()->device->getDeviceInfo().profilingTimerResolution;
44+
}
45+
46+
ExperimentalCommandBuffer::~ExperimentalCommandBuffer() {
47+
auto timestamp = static_cast<uint64_t *>(timestamps->getUnderlyingBuffer());
48+
for (uint32_t i = 0; i < timestampsOffset / (2 * sizeof(uint64_t)); i++) {
49+
auto stop = static_cast<uint64_t>(*(timestamp + 1) * timerResolution);
50+
auto start = static_cast<uint64_t>(*timestamp * timerResolution);
51+
auto delta = stop - start;
52+
printDebugString(defaultPrint, stdout, "#%u: delta %llu start %llu stop %llu\n", i, delta, start, stop);
53+
timestamp += 2;
54+
}
55+
MemoryManager *memManager = commandStreamReceiver->getMemoryManager();
56+
if (memManager) {
57+
memManager->freeGraphicsMemory(timestamps);
58+
memManager->freeGraphicsMemory(experimentalAllocation);
59+
60+
if (currentStream.get()) {
61+
memManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(currentStream->getGraphicsAllocation()), REUSABLE_ALLOCATION);
62+
currentStream->replaceGraphicsAllocation(nullptr);
63+
}
64+
}
65+
}
66+
67+
void ExperimentalCommandBuffer::getCS(size_t minRequiredSize) {
68+
if (!currentStream) {
69+
currentStream.reset(new LinearStream(nullptr));
70+
}
71+
minRequiredSize += CSRequirements::minCommandQueueCommandStreamSize;
72+
if (currentStream->getAvailableSpace() < minRequiredSize) {
73+
MemoryManager *memManager = commandStreamReceiver->getMemoryManager();
74+
// If not, allocate a new block. allocate full pages
75+
minRequiredSize = alignUp(minRequiredSize, MemoryConstants::pageSize);
76+
77+
auto requiredSize = minRequiredSize + CSRequirements::csOverfetchSize;
78+
79+
GraphicsAllocation *allocation = memManager->obtainReusableAllocation(requiredSize, false).release();
80+
if (!allocation) {
81+
allocation = memManager->allocateGraphicsMemory(requiredSize);
82+
}
83+
allocation->setAllocationType(GraphicsAllocation::AllocationType::LINEAR_STREAM);
84+
// Deallocate the old block, if not null
85+
auto oldAllocation = currentStream->getGraphicsAllocation();
86+
if (oldAllocation) {
87+
memManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(oldAllocation), REUSABLE_ALLOCATION);
88+
}
89+
currentStream->replaceBuffer(allocation->getUnderlyingBuffer(), minRequiredSize - CSRequirements::minCommandQueueCommandStreamSize);
90+
currentStream->replaceGraphicsAllocation(allocation);
91+
}
92+
}
93+
94+
void ExperimentalCommandBuffer::makeResidentAllocations() {
95+
commandStreamReceiver->makeResident(*currentStream->getGraphicsAllocation());
96+
commandStreamReceiver->makeResident(*timestamps);
97+
commandStreamReceiver->makeResident(*experimentalAllocation);
98+
}
99+
100+
} // namespace OCLRT
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Copyright (c) 2018, Intel Corporation
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a
5+
* copy of this software and associated documentation files (the "Software"),
6+
* to deal in the Software without restriction, including without limitation
7+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8+
* and/or sell copies of the Software, and to permit persons to whom the
9+
* Software is furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be included
12+
* in all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15+
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18+
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19+
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20+
* OTHER DEALINGS IN THE SOFTWARE.
21+
*/
22+
23+
#pragma once
24+
#include <memory>
25+
#include <stdint.h>
26+
27+
namespace OCLRT {
28+
29+
class CommandStreamReceiver;
30+
class GraphicsAllocation;
31+
class LinearStream;
32+
class MemoryManager;
33+
34+
class ExperimentalCommandBuffer {
35+
public:
36+
virtual ~ExperimentalCommandBuffer();
37+
ExperimentalCommandBuffer(CommandStreamReceiver *csr);
38+
39+
template <typename GfxFamily>
40+
void injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset);
41+
42+
template <typename GfxFamily>
43+
size_t getRequiredInjectionSize() noexcept;
44+
45+
template <typename GfxFamily>
46+
size_t programExperimentalCommandBuffer();
47+
48+
void makeResidentAllocations();
49+
50+
protected:
51+
template <typename GfxFamily>
52+
size_t getTotalExperimentalSize() noexcept;
53+
54+
void getCS(size_t minRequiredSize);
55+
56+
template <typename GfxFamily>
57+
void addTimeStampPipeControl();
58+
59+
template <typename GfxFamily>
60+
size_t getTimeStampPipeControlSize() noexcept;
61+
62+
template <typename GfxFamily>
63+
void addExperimentalCommands();
64+
65+
template <typename GfxFamily>
66+
size_t getExperimentalCommandsSize() noexcept;
67+
68+
CommandStreamReceiver *commandStreamReceiver;
69+
std::unique_ptr<LinearStream> currentStream;
70+
71+
GraphicsAllocation *timestamps;
72+
uint32_t timestampsOffset;
73+
74+
GraphicsAllocation *experimentalAllocation;
75+
uint32_t experimentalAllocationOffset;
76+
77+
bool defaultPrint;
78+
double timerResolution;
79+
};
80+
81+
} // namespace OCLRT

0 commit comments

Comments
 (0)