Skip to content

Commit c910a58

Browse files
Store bcsEngine in an std::vector in OpenCL CommandQUeue
Related-To: NEO-6057 Signed-off-by: Maciej Dziuban <[email protected]>
1 parent eccebfb commit c910a58

File tree

14 files changed

+111
-62
lines changed

14 files changed

+111
-62
lines changed

opencl/source/command_queue/command_queue.cpp

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
8686
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
8787
auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine();
8888
auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
89-
bcsEngine = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular);
89+
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular);
9090
bcsState.engineType = bcsEngineType;
9191
}
9292
}
@@ -113,9 +113,9 @@ CommandQueue::~CommandQueue() {
113113
device->getPerformanceCounters()->shutdown();
114114
}
115115

116-
if (bcsEngine) {
116+
if (auto mainBcs = bcsEngines[0]; mainBcs != nullptr) {
117117
auto &selectorCopyEngine = device->getNearestGenericSubDevice(0)->getSelectorCopyEngine();
118-
EngineHelpers::releaseBcsEngineType(bcsEngine->getEngineType(), selectorCopyEngine);
118+
EngineHelpers::releaseBcsEngineType(mainBcs->getEngineType(), selectorCopyEngine);
119119
}
120120
}
121121

@@ -133,28 +133,35 @@ CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const {
133133
}
134134

135135
CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const {
136-
if (bcsEngine) {
137-
UNRECOVERABLE_IF(bcsEngine->getEngineType() != bcsEngineType);
138-
return bcsEngine->commandStreamReceiver;
136+
const EngineControl *engine = this->bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)];
137+
if (engine == nullptr) {
138+
return nullptr;
139+
} else {
140+
return engine->commandStreamReceiver;
139141
}
140-
return nullptr;
141142
}
142143

143-
CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
144-
if (bcsEngine) {
145-
return bcsEngine->commandStreamReceiver;
144+
CommandStreamReceiver *CommandQueue::getAnyBcs() const {
145+
for (const EngineControl *engine : this->bcsEngines) {
146+
if (engine != nullptr) {
147+
return engine->commandStreamReceiver;
148+
}
146149
}
147150
return nullptr;
148151
}
149152

153+
CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
154+
return getAnyBcs();
155+
}
156+
150157
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const {
151158
const bool blitAllowed = blitEnqueueAllowed(args);
152159
const bool blitPreferred = blitEnqueuePreferred(args);
153160
const bool blitRequired = isCopyOnly;
154161
const bool blit = blitAllowed && (blitPreferred || blitRequired);
155162

156163
if (blit) {
157-
return *bcsEngine->commandStreamReceiver;
164+
return *getAnyBcs();
158165
} else {
159166
return getGpgpuCommandStreamReceiver();
160167
}
@@ -623,12 +630,12 @@ cl_uint CommandQueue::getQueueFamilyIndex() const {
623630
}
624631

625632
void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount) {
626-
UNRECOVERABLE_IF(bcsEngine->getEngineType() != bcsEngineType);
633+
UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType);
627634
this->bcsState.taskCount = newBcsTaskCount;
628635
}
629636

630637
uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const {
631-
UNRECOVERABLE_IF(bcsEngine->getEngineType() != bcsEngineType);
638+
UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType);
632639
return this->bcsState.taskCount;
633640
}
634641

@@ -733,7 +740,7 @@ bool CommandQueue::queueDependenciesClearRequired() const {
733740
}
734741

735742
bool CommandQueue::blitEnqueueAllowed(const CsrSelectionArgs &args) const {
736-
if (bcsEngine == nullptr) {
743+
if (getAnyBcs() == nullptr) {
737744
return false;
738745
}
739746

@@ -878,7 +885,8 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage
878885
const bool isEngineCopyOnly = hwHelper.isCopyOnlyEngineType(engineGroupType);
879886

880887
if (isEngineCopyOnly) {
881-
bcsEngine = &device->getEngine(engineType, EngineUsage::Regular);
888+
std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr);
889+
bcsEngines[EngineHelpers::getBcsIndex(engineType)] = &device->getEngine(engineType, EngineUsage::Regular);
882890
bcsState.engineType = engineType;
883891
timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
884892
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();

opencl/source/command_queue/command_queue.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
226226

227227
MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
228228
CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const;
229+
CommandStreamReceiver *getAnyBcs() const;
229230
CommandStreamReceiver *getBcsForAuxTranslation() const;
230231
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const;
231232
Device &getDevice() const noexcept;
@@ -365,7 +366,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
365366
Context *context = nullptr;
366367
ClDevice *device = nullptr;
367368
EngineControl *gpgpuEngine = nullptr;
368-
EngineControl *bcsEngine = nullptr;
369+
std::array<EngineControl *, bcsInfoMaskSize> bcsEngines = {};
369370

370371
cl_command_queue_properties commandQueueProperties = 0;
371372
std::vector<uint64_t> propertiesVector;

opencl/source/command_queue/command_queue_hw.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ class CommandQueueHw : public CommandQueue {
9292

9393
gpgpuEngine->osContext->ensureContextInitialized();
9494
gpgpuEngine->commandStreamReceiver->initDirectSubmission(device->getDevice(), *gpgpuEngine->osContext);
95-
if (bcsEngine) {
96-
bcsEngine->osContext->ensureContextInitialized();
97-
bcsEngine->commandStreamReceiver->initDirectSubmission(device->getDevice(), *bcsEngine->osContext);
95+
if (const EngineControl *mainBcsEngine = bcsEngines[0]; mainBcsEngine != nullptr) {
96+
mainBcsEngine->osContext->ensureContextInitialized();
97+
mainBcsEngine->commandStreamReceiver->initDirectSubmission(device->getDevice(), *mainBcsEngine->osContext);
9898
}
9999
}
100100

opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ struct BlitEnqueueTests : public ::testing::Test {
104104
mockProgram->setAllowNonUniform(true);
105105

106106
gpgpuCsr = mockCmdQueue->gpgpuEngine->commandStreamReceiver;
107-
bcsCsr = mockCmdQueue->bcsEngine->commandStreamReceiver;
107+
bcsCsr = mockCmdQueue->bcsEngines[0]->commandStreamReceiver;
108108
}
109109

110110
template <typename FamilyType>

opencl/test/unit_test/command_queue/command_queue_tests.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,13 @@ TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngi
230230

231231
struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam<uint32_t> {};
232232

233-
TEST(CommandQueue, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) {
233+
TEST(CommandQueue, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterAnyBcsCsrs) {
234234
HardwareInfo hwInfo = *defaultHwInfo;
235235
hwInfo.capabilityTable.blitterOperationsSupported = false;
236236
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
237237
MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false);
238238

239-
EXPECT_EQ(nullptr, cmdQ.bcsEngine);
239+
EXPECT_EQ(0u, cmdQ.countBcsEngines());
240240

241241
auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver;
242242
EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver());
@@ -1150,8 +1150,8 @@ TEST(CommandQueue, givenCopyOnlyQueueWhenCallingBlitEnqueueAllowedThenReturnTrue
11501150
MockContext context{};
11511151
HardwareInfo *hwInfo = context.getDevice(0)->getRootDeviceEnvironment().getMutableHardwareInfo();
11521152
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
1153-
if (!queue.bcsEngine) {
1154-
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
1153+
if (queue.countBcsEngines() == 0) {
1154+
queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine();
11551155
}
11561156
hwInfo->capabilityTable.blitterOperationsSupported = false;
11571157

@@ -1173,8 +1173,8 @@ TEST(CommandQueue, givenSimpleClCommandWhenCallingBlitEnqueueAllowedThenReturnCo
11731173
MockContext context{};
11741174

11751175
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
1176-
if (!queue.bcsEngine) {
1177-
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
1176+
if (queue.countBcsEngines() == 0) {
1177+
queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine();
11781178
}
11791179

11801180
MultiGraphicsAllocation multiAlloc{1};
@@ -1205,8 +1205,8 @@ TEST(CommandQueue, givenImageTransferClCommandWhenCallingBlitEnqueueAllowedThenR
12051205

12061206
MockContext context{};
12071207
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
1208-
if (!queue.bcsEngine) {
1209-
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
1208+
if (queue.countBcsEngines() == 0) {
1209+
queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine();
12101210
}
12111211

12121212
MockImageBase image{};
@@ -1232,8 +1232,8 @@ TEST(CommandQueue, givenImageTransferClCommandWhenCallingBlitEnqueueAllowedThenR
12321232
TEST(CommandQueue, givenImageToBufferClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
12331233
MockContext context{};
12341234
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
1235-
if (!queue.bcsEngine) {
1236-
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
1235+
if (queue.countBcsEngines() == 0) {
1236+
queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine();
12371237
}
12381238

12391239
MultiGraphicsAllocation multiAlloc{1};
@@ -1796,6 +1796,7 @@ struct CopyOnlyQueueTests : ::testing::Test {
17961796
TEST_F(CopyOnlyQueueTests, givenBcsSelectedWhenCreatingCommandQueueThenItIsCopyOnly) {
17971797
MockCommandQueue queue{context.get(), clDevice.get(), properties, false};
17981798
EXPECT_EQ(bcsEngine->commandStreamReceiver, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS));
1799+
EXPECT_EQ(1u, queue.countBcsEngines());
17991800
EXPECT_NE(nullptr, queue.timestampPacketContainer);
18001801
EXPECT_TRUE(queue.isCopyOnly);
18011802
}

opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl
207207
Surface *surfaces[] = {nullptr};
208208
mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies,
209209
blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest,
210-
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->bcsEngine->commandStreamReceiver);
210+
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->getAnyBcs());
211211
EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue);
212212
EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation);
213213
EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation);
@@ -270,10 +270,11 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
270270
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
271271
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
272272
mockCsr->skipBlitCalls = true;
273-
mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine;
273+
mockCmdQ->clearBcsEngines();
274+
mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine;
274275
cl_int retVal = CL_SUCCESS;
275276
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
276-
auto &bcsCsr = *mockCmdQ->bcsEngine->commandStreamReceiver;
277+
auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver;
277278

278279
auto blocking = true;
279280
TimestampPacketDependencies timestampPacketDependencies;
@@ -315,10 +316,11 @@ HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllow
315316
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
316317
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
317318
mockCsr->skipBlitCalls = true;
318-
mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine;
319+
mockCmdQ->clearBcsEngines();
320+
mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine;
319321
cl_int retVal = CL_SUCCESS;
320322
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
321-
auto &bcsCsr = *mockCmdQ->bcsEngine->commandStreamReceiver;
323+
auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver;
322324

323325
TimestampPacketDependencies timestampPacketDependencies;
324326
EventsRequest eventsRequest(0, nullptr, nullptr);

opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -783,12 +783,12 @@ typedef Test<EnqueueKernelFixture> EnqueueKernelTest;
783783

784784
struct EnqueueAuxKernelTests : public EnqueueKernelTest {
785785
template <typename FamilyType>
786-
class MyCmdQ : public CommandQueueHw<FamilyType> {
786+
class MyCmdQ : public MockCommandQueueHw<FamilyType> {
787787
public:
788788
using CommandQueueHw<FamilyType>::commandStream;
789789
using CommandQueueHw<FamilyType>::gpgpuEngine;
790-
using CommandQueueHw<FamilyType>::bcsEngine;
791-
MyCmdQ(Context *context, ClDevice *device) : CommandQueueHw<FamilyType>(context, device, nullptr, false) {}
790+
using CommandQueueHw<FamilyType>::bcsEngines;
791+
MyCmdQ(Context *context, ClDevice *device) : MockCommandQueueHw<FamilyType>(context, device, nullptr) {}
792792
void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection) override {
793793
CommandQueueHw<FamilyType>::dispatchAuxTranslationBuiltin(multiDispatchInfo, auxTranslationDirection);
794794
auxTranslationDirections.push_back(auxTranslationDirection);
@@ -803,7 +803,7 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
803803

804804
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
805805
waitCalled++;
806-
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
806+
MockCommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
807807
}
808808

809809
std::vector<AuxTranslationDirection> auxTranslationDirections;
@@ -935,7 +935,8 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe
935935

936936
MockKernelWithInternals mockKernel(*pClDevice, context);
937937
MyCmdQ<FamilyType> cmdQ(context, pClDevice);
938-
cmdQ.bcsEngine = cmdQ.gpgpuEngine;
938+
cmdQ.clearBcsEngines();
939+
cmdQ.bcsEngines[0] = cmdQ.gpgpuEngine;
939940

940941
hwInfo->capabilityTable.blitterOperationsSupported = true;
941942

opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,8 @@ HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessCor
899899

900900
MockCommandQueueHw<FamilyType> cmdQ(&context, pClDevice, nullptr);
901901
cmdQ.gpgpuEngine = &engineControl0;
902-
cmdQ.bcsEngine = &engineControl1;
902+
cmdQ.clearBcsEngines();
903+
cmdQ.bcsEngines[0] = &engineControl1;
903904

904905
cl_int error = CL_SUCCESS;
905906
std::unique_ptr<Buffer> buffer(Buffer::create(&context, 0, 1, nullptr, error));

opencl/test/unit_test/event/event_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,12 @@ TEST(Event, givenBcsCsrSetInEventWhenPeekingBcsTaskCountThenReturnCorrectTaskCou
143143
new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockAlignedMallocManagerDevice>(&hwInfo)}};
144144
MockContext context{device.get()};
145145
MockCommandQueue queue{context};
146-
queue.updateBcsTaskCount(queue.bcsEngine->getEngineType(), 19);
146+
queue.updateBcsTaskCount(queue.bcsEngines[0]->getEngineType(), 19);
147147
Event event{&queue, CL_COMMAND_READ_BUFFER, 0, 0};
148148

149149
EXPECT_EQ(0u, event.peekBcsTaskCountFromCommandQueue());
150150

151-
event.setupBcs(queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->getOsContext().getEngineType());
151+
event.setupBcs(queue.bcsEngines[0]->getEngineType());
152152
EXPECT_EQ(19u, event.peekBcsTaskCountFromCommandQueue());
153153
}
154154

0 commit comments

Comments
 (0)