Skip to content

Commit 807d188

Browse files
DianaChenigcbot
authored andcommitted
Add some IGC and vISA testing support
- Add IGC flag "DisableConstBaseGlobalBaseArg" to force relocations instead of implicit args constBase and globalBase, for the testing purpose - Introduce "bool VRTEnbale" GRFMode::Config to indicate if the Config can participate VRT (regSharingHeuristics)
1 parent 17660da commit 807d188

File tree

6 files changed

+66
-29
lines changed

6 files changed

+66
-29
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/ProgramScopeConstants/ProgramScopeConstantAnalysis.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,12 +264,18 @@ bool ProgramScopeConstantAnalysis::runOnModule(Module& M)
264264
// Stackcalls:
265265
// Stackcall ABI does not allow implicit args, so rely on relocation for global variable access
266266

267+
// TODO: Plan to disable ConstBase and GlobalBase implicit arguments on PVC+.
268+
// StatelessToStateful pass is not enabled for most of the cases so there
269+
// is no benefit to add the implicit arguments. Const/Global variables
270+
// access can just go through relocations.
271+
267272
// Workaround: When there is stringConstants in the module, do not insert
268273
// implicit arguments to prevent const vars getting promoted
269274
// at statelessToStateful pass. In zebin path, stateful promotion
270275
// of const vars can't work well with printf strings.
271276
bool skipConstAndGlobalBaseArgs =
272-
Ctx->enableZEBinary() && !m_pModuleMd->stringConstants.empty();
277+
IGC_IS_FLAG_ENABLED(DisableConstBaseGlobalBaseArg) ||
278+
(Ctx->enableZEBinary() && !m_pModuleMd->stringConstants.empty());
273279

274280
if (!skipConstAndGlobalBaseArgs && hasInlineConstantBuffer)
275281
{

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ DECLARE_IGC_REGKEY(bool, EnablePlatformFenceOpt, true, "Force DG2 only f
227227
DECLARE_IGC_REGKEY(bool, EnableSLMConstProp, true, "Enable SLM constant propagation (compute shader only).", false)
228228
DECLARE_IGC_REGKEY(bool, EnableStatelessToStateful, true, "Enable Stateless To Stateful transformation for global and constant address space in OpenCL kernels", false)
229229
DECLARE_IGC_REGKEY(bool, EnableStatefulToken, true, "Enable generating patch token to indicate a ptr argument is fully converted to stateful (temporary)", false)
230+
DECLARE_IGC_REGKEY(bool, DisableConstBaseGlobalBaseArg, false, "Do no generate kernel implicit arguments: constBase and globalBase", false)
230231
DECLARE_IGC_REGKEY(bool, EnableGenUpdateCB, false, "Enable derived constant optimization.", false)
231232
DECLARE_IGC_REGKEY(bool, EnableGenUpdateCBResInfo, false, "Enable derived constant optimization with resinfo.", false)
232233
DECLARE_IGC_REGKEY(bool, EnableHighestSIMDForNoSpill, false, "When there is no spill choose highest SIMD (compute shader only).", false)

visa/G4_Kernel.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -620,11 +620,11 @@ void G4_Kernel::calculateSimdSize() {
620620
// Updates kernel's related structures to large GRF
621621
//
622622
bool G4_Kernel::updateKernelToLargerGRF() {
623-
if (numRegTotal == grfMode.getMaxGRF())
623+
if (numRegTotal == grfMode.getVRTMaxGRF())
624624
return false;
625625

626626
// Scale number of GRFs, Acc, SWSB tokens.
627-
setKernelParameters(grfMode.getLargerGRF());
627+
setKernelParameters(grfMode.getVRTLargerGRF());
628628
fg.builder->rebuildPhyRegPool(getNumRegTotal());
629629
return true;
630630
}
@@ -633,11 +633,11 @@ bool G4_Kernel::updateKernelToLargerGRF() {
633633
// Updates kernel's related structures to smaller GRF
634634
//
635635
bool G4_Kernel::updateKernelToSmallerGRF() {
636-
if (numRegTotal == grfMode.getMinGRF())
636+
if (numRegTotal == grfMode.getVRTMinGRF())
637637
return false;
638638

639639
// Scale number of GRFs, Acc, SWSB tokens.
640-
setKernelParameters(grfMode.getSmallerGRF());
640+
setKernelParameters(grfMode.getVRTSmallerGRF());
641641
fg.builder->rebuildPhyRegPool(getNumRegTotal());
642642
return true;
643643
}
@@ -1739,6 +1739,7 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
17391739
return;
17401740

17411741
iga_gen_t igaPlatform = getIGAPlatform(getPlatform());
1742+
17421743
const iga::Model *igaModel =
17431744
iga::Model::LookupModel(iga::ToPlatform(igaPlatform));
17441745
iga::SWSB_ENCODE_MODE swsbEncodeMode = igaModel->getSWSBEncodeMode();
@@ -2048,23 +2049,24 @@ unsigned G4_Kernel::getComputeFFIDGP1NextOff() const {
20482049

20492050

20502051
// GRF modes supported by HW
2052+
// There must be at least one Config that is VRTEnable for each platform
20512053
GRFMode::GRFMode(const TARGET_PLATFORM platform, Options *op) : options(op) {
20522054
switch (platform) {
20532055
case Xe_XeHPSDV:
20542056
case Xe_DG2:
20552057
case Xe_MTL:
20562058
configs.resize(2);
20572059
// Configurations with <numGRF, numThreads, SWSBTokens, numAcc>
2058-
configs[0] = {128, 8, 16, 4};
2059-
configs[1] = {256, 4, 16, 8};
2060+
configs[0] = Config(128, 8, 16, 4);
2061+
configs[1] = Config(256, 4, 16, 8);
20602062
defaultMode = 0;
20612063
break;
20622064
case Xe_PVC:
20632065
case Xe_PVCXT:
20642066
configs.resize(2);
20652067
// Configurations with <numGRF, numThreads, SWSBTokens, numAcc>
2066-
configs[0] = {128, 8, 16, 4};
2067-
configs[1] = {256, 4, 32, 8};
2068+
configs[0] = Config(128, 8, 16, 4);
2069+
configs[1] = Config(256, 4, 32, 8);
20682070
defaultMode = 0;
20692071
break;
20702072
default:
@@ -2082,7 +2084,8 @@ unsigned GRFMode::findModeByRegPressure(unsigned maxRP, unsigned largestInputReg
20822084
unsigned i = 0, newGRF = 0;
20832085
// find appropiate GRF based on reg pressure
20842086
for (; i < size; i++) {
2085-
if (maxRP <= configs[i].numGRF && largestInputReg <= configs[i].numGRF) {
2087+
if (configs[i].VRTEnable && maxRP <= configs[i].numGRF &&
2088+
largestInputReg <= configs[i].numGRF) {
20862089
newGRF = configs[i].numGRF;
20872090
break;
20882091
}
@@ -2091,7 +2094,7 @@ unsigned GRFMode::findModeByRegPressure(unsigned maxRP, unsigned largestInputReg
20912094
// if not found, pressure is too high
20922095
// set largest grf mode
20932096
if (i == size)
2094-
newGRF = getMaxGRF();
2097+
newGRF = getVRTMaxGRF();
20952098

20962099
return newGRF;
20972100
}

visa/G4_Kernel.hpp

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ SPDX-License-Identifier: MIT
1616
#include "include/gtpin_IGC_interface.h"
1717
#include "Assertions.h"
1818

19+
#include <algorithm>
1920
#include <cstdint>
2021
#include <iostream>
2122
#include <map>
@@ -150,20 +151,8 @@ class GRFMode {
150151
unsigned findModeByRegPressure(unsigned maxRP, unsigned largestInputReg);
151152

152153
unsigned getNumGRF() const { return configs[currentMode].numGRF; }
153-
unsigned getMinGRF() const { return configs[0].numGRF; }
154-
unsigned getMaxGRF() const {
155-
return configs[configs.size() - 1].numGRF;
156-
}
157154
unsigned getDefaultGRF() const { return configs[defaultMode].numGRF; }
158155
void setDefaultGRF() { currentMode = defaultMode; }
159-
unsigned getLargerGRF() const {
160-
return currentMode + 1 < configs.size() ? configs[currentMode + 1].numGRF
161-
: configs[currentMode].numGRF;
162-
}
163-
unsigned getSmallerGRF() const {
164-
return (signed)currentMode - 1 >= 0 ? configs[currentMode - 1].numGRF
165-
: configs[currentMode].numGRF;
166-
}
167156

168157
unsigned getNumThreads() const { return configs[currentMode].numThreads; }
169158
unsigned getMinNumThreads() const {
@@ -180,13 +169,51 @@ class GRFMode {
180169

181170
unsigned getNumAcc() const { return configs[currentMode].numAcc; }
182171

172+
// ----- helper functions for regSharingHeuristics (VRT) ----- //
173+
unsigned getVRTMinGRF() const {
174+
auto found = std::find_if(configs.begin(), configs.end(),
175+
[](const Config &c) { return c.VRTEnable; });
176+
return found->numGRF;
177+
}
178+
179+
unsigned getVRTMaxGRF() const {
180+
auto found = std::find_if(configs.rbegin(), configs.rend(),
181+
[](const Config &c) { return c.VRTEnable; });
182+
return found->numGRF;
183+
}
184+
185+
unsigned getVRTLargerGRF() const {
186+
// find the first larger mode that's available for VRT
187+
for (auto i = currentMode + 1; i < configs.size(); ++i) {
188+
if (configs[i].VRTEnable)
189+
return configs[i].numGRF;
190+
}
191+
return configs[currentMode].numGRF;
192+
}
193+
194+
unsigned getVRTSmallerGRF() const {
195+
for (auto i = currentMode - 1; i >= 0 ; --i) {
196+
if (configs[i].VRTEnable)
197+
return configs[i].numGRF;
198+
}
199+
return configs[currentMode].numGRF;
200+
}
201+
183202
private:
184203
// Parameters associated to a GRF mode
185204
struct Config {
205+
constexpr Config()
206+
: numGRF(0), numThreads(0), numSWSB(0), numAcc(0), VRTEnable(false) {}
207+
constexpr Config(unsigned NumGRF, unsigned NumThreads, unsigned NumSWSB,
208+
unsigned NumAcc, bool enableForVRT = true)
209+
: numGRF(NumGRF), numThreads(NumThreads), numSWSB(NumSWSB),
210+
numAcc(NumAcc), VRTEnable(enableForVRT) {}
186211
unsigned numGRF;
187212
unsigned numThreads;
188213
unsigned numSWSB;
189214
unsigned numAcc;
215+
// if the config can be used by regSharingHeuristics
216+
bool VRTEnable;
190217
};
191218
// Vector configs maintains all the GRF modes available for the platform
192219
// being compiled.

visa/LocalScheduler/G4_Sched.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -714,8 +714,8 @@ bool preRA_RegSharing::run(unsigned &KernelPressure) {
714714
unsigned BBRP = rp.getPressure(bb);
715715

716716
unsigned UpperBoundGRF = 0;
717-
if (GRFdecreased && KernelPressure < kernel.grfMode.getMaxGRF())
718-
UpperBoundGRF = kernel.grfMode.getLargerGRF();
717+
if (GRFdecreased && KernelPressure < kernel.grfMode.getVRTMaxGRF())
718+
UpperBoundGRF = kernel.grfMode.getVRTLargerGRF();
719719
Changed |= S.scheduleBlockForLatency(BBRP, Changed, UpperBoundGRF);
720720
}
721721

visa/PrologEpilog.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -357,16 +357,16 @@ class PayloadLoader
357357
r0(
358358
b.createHardwiredDeclare(
359359
k.numEltPerGRF<Type_UD>(), Type_UD, 0, 0)),
360-
rtmp(
361-
b.createHardwiredDeclare(
362-
k.numEltPerGRF<Type_UD>(), Type_UD,
363-
k.getNumRegTotal() - 1, 0)),
364360
perThreadLoadStartGRF(
365361
k.getOptions()->getuInt32Option(vISA_loadThreadPayloadStartReg)),
366362
numPerThreadGRF(
367363
AlignUp(k.getInt32KernelAttr(Attributes::ATTR_PerThreadInputSize),
368364
k.numEltPerGRF<Type_UB>()) / k.numEltPerGRF<Type_UB>())
369365
{
366+
367+
auto rtmpRegNum = k.getNumRegTotal() - 1;
368+
rtmp = b.createHardwiredDeclare(k.numEltPerGRF<Type_UD>(), Type_UD, rtmpRegNum, 0);
369+
370370
r0->setName("r0");
371371
rtmp->setName("rtmp");
372372

0 commit comments

Comments
 (0)