Skip to content

Commit e06aa17

Browse files
pwilmaCompute-Runtime-Automation
authored andcommitted
Grf configuration
Change-Id: I3741f53a38c6707b0c8ad82ae553ea65ae6917e4 Signed-off-by: Pawel Wilma <[email protected]>
1 parent a81b1a4 commit e06aa17

19 files changed

+73
-26
lines changed

runtime/command_queue/enqueue_common.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
534534
}
535535

536536
auto mediaSamplerRequired = false;
537+
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
537538
Kernel *kernel = nullptr;
538539
for (auto &dispatchInfo : multiDispatchInfo) {
539540
if (kernel != dispatchInfo.getKernel()) {
@@ -544,6 +545,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
544545
kernel->makeResident(commandStreamReceiver);
545546
requiresCoherency |= kernel->requiresCoherency();
546547
mediaSamplerRequired |= kernel->isVmeKernel();
548+
auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired;
549+
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
547550
}
548551

549552
if (mediaSamplerRequired) {
@@ -593,7 +596,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
593596
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
594597
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
595598
}
596-
599+
dispatchFlags.numGrfRequired = numGrfRequired;
597600
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
598601

599602
if (gtpinIsGTPinInitialized()) {

runtime/command_stream/command_stream_receiver.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "runtime/helpers/completion_stamp.h"
1616
#include "runtime/helpers/flat_batch_buffer_helper.h"
1717
#include "runtime/helpers/options.h"
18+
#include "runtime/kernel/grf_config.h"
1819
#include "runtime/indirect_heap/indirect_heap.h"
1920
#include <cstddef>
2021
#include <cstdint>
@@ -179,6 +180,7 @@ class CommandStreamReceiver {
179180
int8_t lastMediaSamplerConfig = -1;
180181
PreemptionMode lastPreemptionMode = PreemptionMode::Initial;
181182
uint32_t latestSentStatelessMocsConfig = 0;
183+
uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber;
182184

183185
LinearStream commandStream;
184186

runtime/command_stream/command_stream_receiver_hw.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
5050
size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const;
5151
size_t getCmdSizeForL3Config() const;
5252
size_t getCmdSizeForPipelineSelect() const;
53-
size_t getCmdSizeForCoherency();
53+
size_t getCmdSizeForComputeMode();
5454
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
55-
void programCoherency(LinearStream &csr, DispatchFlags &dispatchFlags);
55+
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags);
5656

5757
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, OsContext &osContext) override;
5858
const HardwareInfo &peekHwInfo() const { return hwInfo; }

runtime/command_stream/command_stream_receiver_hw.inl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
228228
csrSizeRequestFlags.coherencyRequestChanged = this->lastSentCoherencyRequest != static_cast<int8_t>(dispatchFlags.requiresCoherency);
229229
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
230230
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.mediaSamplerRequired);
231+
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
231232

232233
size_t requiredScratchSizeInBytes = requiredScratchSize * device.getDeviceInfo().computeUnitsUsedForScratch;
233234

@@ -255,7 +256,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
255256
}
256257
initPageTableManagerRegisters(commandStreamCSR);
257258
programPreemption(commandStreamCSR, device, dispatchFlags);
258-
programCoherency(commandStreamCSR, dispatchFlags);
259+
programComputeMode(commandStreamCSR, dispatchFlags);
259260
programL3(commandStreamCSR, dispatchFlags, newL3Config);
260261
programPipelineSelect(commandStreamCSR, dispatchFlags);
261262
programPreamble(commandStreamCSR, device, dispatchFlags, newL3Config);
@@ -628,7 +629,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
628629
size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_START);
629630

630631
size += getCmdSizeForL3Config();
631-
size += getCmdSizeForCoherency();
632+
size += getCmdSizeForComputeMode();
632633
size += getCmdSizeForMediaSampler(dispatchFlags.mediaSamplerRequired);
633634
size += getCmdSizeForPipelineSelect();
634635
size += getCmdSizeForPreemption(dispatchFlags);

runtime/command_stream/csr_definitions.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "runtime/memory_manager/memory_constants.h"
1010
#include "runtime/helpers/hw_info.h"
1111
#include "runtime/helpers/properties_helper.h"
12+
#include "runtime/kernel/grf_config.h"
1213
#include <limits>
1314

1415
namespace OCLRT {
@@ -43,6 +44,7 @@ struct DispatchFlags {
4344
FlushStampTrackingObj *flushStampReference = nullptr;
4445
PreemptionMode preemptionMode = PreemptionMode::Disabled;
4546
EventsRequest *outOfDeviceDependencies = nullptr;
47+
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
4648
};
4749

4850
struct CsrSizeRequestFlags {
@@ -51,5 +53,6 @@ struct CsrSizeRequestFlags {
5153
bool preemptionRequestChanged = false;
5254
bool mediaSamplerConfigChanged = false;
5355
bool hasSharedHandles = false;
56+
bool numGrfRequiredChanged = false;
5457
};
5558
} // namespace OCLRT

runtime/gen10/command_stream_receiver_hw_gen10.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ typedef CNLFamily Family;
1616
static auto gfxCore = IGFX_GEN10_CORE;
1717

1818
template <>
19-
size_t CommandStreamReceiverHw<Family>::getCmdSizeForCoherency() {
19+
size_t CommandStreamReceiverHw<Family>::getCmdSizeForComputeMode() {
2020
if (csrSizeRequestFlags.coherencyRequestChanged) {
2121
return sizeof(typename Family::MI_LOAD_REGISTER_IMM);
2222
}
2323
return 0;
2424
}
2525

2626
template <>
27-
void CommandStreamReceiverHw<Family>::programCoherency(LinearStream &stream, DispatchFlags &dispatchFlags) {
27+
void CommandStreamReceiverHw<Family>::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) {
2828
if (csrSizeRequestFlags.coherencyRequestChanged) {
2929
LriHelper<Family>::program(&stream, gen10HdcModeRegisterAddresss, DwordBuilder::build(4, true, !dispatchFlags.requiresCoherency));
3030
this->lastSentCoherencyRequest = static_cast<int8_t>(dispatchFlags.requiresCoherency);

runtime/gen8/command_stream_receiver_hw_gen8.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ typedef BDWFamily Family;
1515
static auto gfxCore = IGFX_GEN8_CORE;
1616

1717
template <>
18-
size_t CommandStreamReceiverHw<Family>::getCmdSizeForCoherency() {
18+
size_t CommandStreamReceiverHw<Family>::getCmdSizeForComputeMode() {
1919
return 0;
2020
}
2121

2222
template <>
23-
void CommandStreamReceiverHw<Family>::programCoherency(LinearStream &stream, DispatchFlags &dispatchFlags) {
23+
void CommandStreamReceiverHw<Family>::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) {
2424
}
2525

2626
template <>

runtime/gen9/command_stream_receiver_hw_gen9.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ typedef SKLFamily Family;
1515
static auto gfxCore = IGFX_GEN9_CORE;
1616

1717
template <>
18-
size_t CommandStreamReceiverHw<Family>::getCmdSizeForCoherency() {
18+
size_t CommandStreamReceiverHw<Family>::getCmdSizeForComputeMode() {
1919
return 0;
2020
}
2121

2222
template <>
23-
void CommandStreamReceiverHw<Family>::programCoherency(LinearStream &stream, DispatchFlags &dispatchFlags) {
23+
void CommandStreamReceiverHw<Family>::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) {
2424
}
2525

2626
template <>

runtime/kernel/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@
77
set(RUNTIME_SRCS_KERNEL
88
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
99
${CMAKE_CURRENT_SOURCE_DIR}/dynamic_kernel_info.h
10+
${CMAKE_CURRENT_SOURCE_DIR}/grf_config.h
1011
${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.cpp
1112
${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.h
1213
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp
1314
${CMAKE_CURRENT_SOURCE_DIR}/kernel.h
1415
${CMAKE_CURRENT_SOURCE_DIR}/kernel.inl
16+
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_reconfiguration.cpp
1517
)
1618
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL})
1719
set_property(GLOBAL PROPERTY RUNTIME_SRCS_KERNEL ${RUNTIME_SRCS_KERNEL})

runtime/kernel/grf_config.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/*
2+
* Copyright (C) 2018 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#pragma once
9+
10+
namespace GrfConfig {
11+
constexpr uint32_t DefaultGrfNumber = 128u;
12+
}

runtime/kernel/kernel.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,8 @@ cl_int Kernel::initialize() {
354354
program->allocateBlockPrivateSurfaces();
355355
}
356356

357+
reconfigureKernel();
358+
357359
retVal = CL_SUCCESS;
358360

359361
} while (false);

runtime/kernel/kernel.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,8 @@ class Kernel : public BaseObject<_cl_kernel> {
458458

459459
void resolveArgs();
460460

461+
void reconfigureKernel();
462+
461463
Program *program;
462464
Context *context;
463465
const Device &device;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/*
2+
* Copyright (C) 2018 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#include "runtime/kernel/kernel.h"
9+
10+
namespace OCLRT {
11+
void Kernel::reconfigureKernel() {
12+
}
13+
} // namespace OCLRT

runtime/program/kernel_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,5 +239,6 @@ struct KernelInfo {
239239
bool isKernelHeapSubstituted = false;
240240
GraphicsAllocation *kernelAllocation = nullptr;
241241
DebugData debugData;
242+
bool computeMode = false;
242243
};
243244
} // namespace OCLRT

runtime/program/process_gen_binary.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,8 @@ cl_int Program::parsePatchList(KernelInfo &kernelInfo) {
535535
"\n .SubgroupIndependentForwardProgressRequired", kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired,
536536
"\n .WorkgroupWalkOrderDim0", kernelInfo.workgroupWalkOrder[0],
537537
"\n .WorkgroupWalkOrderDim1", kernelInfo.workgroupWalkOrder[1],
538-
"\n .WorkgroupWalkOrderDim2", kernelInfo.workgroupWalkOrder[2]);
538+
"\n .WorkgroupWalkOrderDim2", kernelInfo.workgroupWalkOrder[2],
539+
"\n .NumGRFRequired", kernelInfo.patchInfo.executionEnvironment->NumGRFRequired);
539540
break;
540541

541542
case PATCH_TOKEN_DATA_PARAMETER_STREAM:

unit_tests/gen10/coherency_tests_gen10.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,19 @@ struct Gen10CoherencyRequirements : public ::testing::Test {
4242
GEN10TEST_F(Gen10CoherencyRequirements, coherencyCmdSize) {
4343
auto lriSize = sizeof(MI_LOAD_REGISTER_IMM);
4444
overrideCoherencyRequest(false, false);
45-
auto retSize = csr->getCmdSizeForCoherency();
45+
auto retSize = csr->getCmdSizeForComputeMode();
4646
EXPECT_EQ(0u, retSize);
4747

4848
overrideCoherencyRequest(false, true);
49-
retSize = csr->getCmdSizeForCoherency();
49+
retSize = csr->getCmdSizeForComputeMode();
5050
EXPECT_EQ(0u, retSize);
5151

5252
overrideCoherencyRequest(true, true);
53-
retSize = csr->getCmdSizeForCoherency();
53+
retSize = csr->getCmdSizeForComputeMode();
5454
EXPECT_EQ(lriSize, retSize);
5555

5656
overrideCoherencyRequest(true, false);
57-
retSize = csr->getCmdSizeForCoherency();
57+
retSize = csr->getCmdSizeForComputeMode();
5858
EXPECT_EQ(lriSize, retSize);
5959
}
6060

@@ -68,14 +68,14 @@ GEN10TEST_F(Gen10CoherencyRequirements, coherencyCmdValues) {
6868
expectedCmd.setDataDword(DwordBuilder::build(4, true));
6969

7070
overrideCoherencyRequest(true, false);
71-
csr->programCoherency(stream, flags);
71+
csr->programComputeMode(stream, flags);
7272
EXPECT_EQ(lriSize, stream.getUsed());
7373

7474
auto cmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(stream.getCpuBase());
7575
EXPECT_TRUE(memcmp(&expectedCmd, cmd, lriSize) == 0);
7676

7777
overrideCoherencyRequest(true, true);
78-
csr->programCoherency(stream, flags);
78+
csr->programComputeMode(stream, flags);
7979
EXPECT_EQ(lriSize * 2, stream.getUsed());
8080

8181
cmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(stream.getCpuBase(), lriSize));

unit_tests/gen8/coherency_tests_gen8.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ GEN8TEST_F(Gen8CoherencyRequirements, noCoherencyProgramming) {
1919
LinearStream stream;
2020
DispatchFlags flags = {};
2121

22-
auto retSize = csr.getCmdSizeForCoherency();
22+
auto retSize = csr.getCmdSizeForComputeMode();
2323
EXPECT_EQ(0u, retSize);
24-
csr.programCoherency(stream, flags);
24+
csr.programComputeMode(stream, flags);
2525
EXPECT_EQ(0u, stream.getUsed());
2626

2727
flags.requiresCoherency = true;
28-
retSize = csr.getCmdSizeForCoherency();
28+
retSize = csr.getCmdSizeForComputeMode();
2929
EXPECT_EQ(0u, retSize);
30-
csr.programCoherency(stream, flags);
30+
csr.programComputeMode(stream, flags);
3131
EXPECT_EQ(0u, stream.getUsed());
3232
}

unit_tests/gen9/coherency_tests_gen9.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ GEN9TEST_F(Gen9CoherencyRequirements, noCoherencyProgramming) {
1919
LinearStream stream;
2020
DispatchFlags flags = {};
2121

22-
auto retSize = csr.getCmdSizeForCoherency();
22+
auto retSize = csr.getCmdSizeForComputeMode();
2323
EXPECT_EQ(0u, retSize);
24-
csr.programCoherency(stream, flags);
24+
csr.programComputeMode(stream, flags);
2525
EXPECT_EQ(0u, stream.getUsed());
2626

2727
flags.requiresCoherency = true;
28-
retSize = csr.getCmdSizeForCoherency();
28+
retSize = csr.getCmdSizeForComputeMode();
2929
EXPECT_EQ(0u, retSize);
30-
csr.programCoherency(stream, flags);
30+
csr.programComputeMode(stream, flags);
3131
EXPECT_EQ(0u, stream.getUsed());
3232
}

unit_tests/mocks/mock_kernel.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#pragma once
99

1010
#include "runtime/helpers/string.h"
11+
#include "runtime/kernel/grf_config.h"
1112
#include "runtime/kernel/kernel.h"
1213
#include "runtime/scheduler/scheduler_kernel.h"
1314
#include "runtime/device/device.h"
@@ -118,6 +119,7 @@ class MockKernel : public Kernel {
118119
SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
119120
memset(executionEnvironment, 0, sizeof(SPatchExecutionEnvironment));
120121
executionEnvironment->HasDeviceEnqueue = 0;
122+
executionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber;
121123
info->patchInfo.executionEnvironment = executionEnvironment;
122124

123125
info->crossThreadData = new char[crossThreadSize];
@@ -243,6 +245,8 @@ class MockKernelWithInternals {
243245
memset(&executionEnvironment, 0, sizeof(SPatchExecutionEnvironment));
244246
memset(&executionEnvironmentBlock, 0, sizeof(SPatchExecutionEnvironment));
245247
memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream));
248+
executionEnvironment.NumGRFRequired = GrfConfig::DefaultGrfNumber;
249+
executionEnvironmentBlock.NumGRFRequired = GrfConfig::DefaultGrfNumber;
246250
kernelHeader.SurfaceStateHeapSize = sizeof(sshLocal);
247251
threadPayload.LocalIDXPresent = 1;
248252
threadPayload.LocalIDYPresent = 1;
@@ -324,6 +328,7 @@ class MockParentKernel : public Kernel {
324328
SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
325329
*executionEnvironment = {};
326330
executionEnvironment->HasDeviceEnqueue = 1;
331+
executionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber;
327332
info->patchInfo.executionEnvironment = executionEnvironment;
328333

329334
SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueue = new SPatchAllocateStatelessDefaultDeviceQueueSurface;

0 commit comments

Comments
 (0)