Skip to content

Commit 91de6a0

Browse files
jfuentesigcbot
authored andcommitted
Update GRF selection heuristic
1 parent 0ab0f81 commit 91de6a0

File tree

3 files changed

+57
-25
lines changed

3 files changed

+57
-25
lines changed

visa/G4_Kernel.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -620,11 +620,11 @@ void G4_Kernel::calculateSimdSize() {
620620
// Updates kernel's related structures to large GRF
621621
//
622622
bool G4_Kernel::updateKernelToLargerGRF() {
623-
if (numRegTotal == grfMode.getVRTMaxGRF())
623+
if (numRegTotal == grfMode.getMaxGRF())
624624
return false;
625625

626626
// Scale number of GRFs, Acc, SWSB tokens.
627-
setKernelParameters(grfMode.getVRTLargerGRF());
627+
setKernelParameters(grfMode.moveToLargerGRF());
628628
fg.builder->rebuildPhyRegPool(getNumRegTotal());
629629
return true;
630630
}
@@ -633,11 +633,11 @@ bool G4_Kernel::updateKernelToLargerGRF() {
633633
// Updates kernel's related structures to smaller GRF
634634
//
635635
bool G4_Kernel::updateKernelToSmallerGRF() {
636-
if (numRegTotal == grfMode.getVRTMinGRF())
636+
if (numRegTotal == grfMode.getMinGRF())
637637
return false;
638638

639639
// Scale number of GRFs, Acc, SWSB tokens.
640-
setKernelParameters(grfMode.getVRTSmallerGRF());
640+
setKernelParameters(grfMode.moveToSmallerGRF());
641641
fg.builder->rebuildPhyRegPool(getNumRegTotal());
642642
return true;
643643
}
@@ -653,7 +653,7 @@ void G4_Kernel::updateKernelByRegPressure(unsigned regPressure) {
653653
largestInputReg = std::max(largestInputReg, maxRegPayloadDispatch);
654654
}
655655

656-
unsigned newGRF = grfMode.findModeByRegPressure(regPressure, largestInputReg);
656+
unsigned newGRF = grfMode.setModeByRegPressure(regPressure, largestInputReg);
657657

658658
if (newGRF == numRegTotal)
659659
return;
@@ -2099,25 +2099,27 @@ GRFMode::GRFMode(const TARGET_PLATFORM platform, Options *op) : options(op) {
20992099
currentMode = defaultMode;
21002100
}
21012101

2102-
unsigned GRFMode::findModeByRegPressure(unsigned maxRP, unsigned largestInputReg) {
2103-
unsigned size = configs.size();
2104-
unsigned i = 0, newGRF = 0;
2102+
unsigned GRFMode::setModeByRegPressure(unsigned maxRP, unsigned largestInputReg) {
2103+
unsigned size = configs.size(), i = 0;
21052104
// find appropiate GRF based on reg pressure
21062105
for (; i < size; i++) {
21072106
if (configs[i].VRTEnable && maxRP <= configs[i].numGRF &&
21082107
// Check that we've at least 8 GRFs over and above
21092108
// those blocked for kernel input. This helps cases
21102109
// where an 8 GRF variable shows up in entry BB.
21112110
(largestInputReg + 8) <= configs[i].numGRF) {
2112-
newGRF = configs[i].numGRF;
2111+
currentMode = i;
21132112
break;
21142113
}
21152114
}
2115+
return configs[currentMode].numGRF;
2116+
}
21162117

2117-
// if not found, pressure is too high
2118-
// set largest grf mode
2119-
if (i == size)
2120-
newGRF = getVRTMaxGRF();
2118+
// Check if next larger GRF has the same number of threads per EU
2119+
bool GRFMode::hasLargerGRFSameThreads() const {
2120+
unsigned largerGrfIdx = currentMode + 1;
2121+
if (largerGrfIdx == configs.size() || !configs[largerGrfIdx].VRTEnable)
2122+
return false;
21212123

2122-
return newGRF;
2124+
return configs[currentMode].numThreads == configs[largerGrfIdx].numThreads;
21232125
}

visa/G4_Kernel.hpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ class GRFMode {
148148
return iter != configs.end();
149149
}
150150

151-
unsigned findModeByRegPressure(unsigned maxRP, unsigned largestInputReg);
151+
unsigned setModeByRegPressure(unsigned maxRP, unsigned largestInputReg);
152+
bool hasLargerGRFSameThreads() const;
152153

153154
unsigned getNumGRF() const { return configs[currentMode].numGRF; }
154155
unsigned getDefaultGRF() const { return configs[defaultMode].numGRF; }
@@ -170,19 +171,20 @@ class GRFMode {
170171
unsigned getNumAcc() const { return configs[currentMode].numAcc; }
171172

172173
// ----- helper functions for autoGRFSelection (VRT) ----- //
173-
unsigned getVRTMinGRF() const {
174+
unsigned getMinGRF() const {
174175
auto found = std::find_if(configs.begin(), configs.end(),
175176
[](const Config &c) { return c.VRTEnable; });
176177
return found->numGRF;
177178
}
178179

179-
unsigned getVRTMaxGRF() const {
180+
unsigned getMaxGRF() const {
180181
auto found = std::find_if(configs.rbegin(), configs.rend(),
181182
[](const Config &c) { return c.VRTEnable; });
182183
return found->numGRF;
183184
}
184185

185-
unsigned getVRTLargerGRF() const {
186+
// Get the next larger GRF available
187+
unsigned getLargerGRF() const {
186188
// find the first larger mode that's available for VRT
187189
for (auto i = currentMode + 1; i < configs.size(); ++i) {
188190
if (configs[i].VRTEnable)
@@ -191,14 +193,37 @@ class GRFMode {
191193
return configs[currentMode].numGRF;
192194
}
193195

194-
unsigned getVRTSmallerGRF() const {
196+
// Get the next smaller GRF available
197+
unsigned getSmallerGRF() const {
195198
for (auto i = static_cast<int>(currentMode) - 1; i >= 0 ; --i) {
196199
if (configs[i].VRTEnable)
197200
return configs[i].numGRF;
198201
}
199202
return configs[currentMode].numGRF;
200203
}
201204

205+
// Move GRF mode to the larger GRF available and return the number
206+
unsigned moveToLargerGRF() {
207+
for (auto i = currentMode + 1; i < configs.size(); ++i) {
208+
if (configs[i].VRTEnable) {
209+
currentMode = i;
210+
break;
211+
}
212+
}
213+
return configs[currentMode].numGRF;
214+
}
215+
216+
// Move GRF mode to the smaller GRF available and return the number
217+
unsigned moveToSmallerGRF() {
218+
for (auto i = currentMode - 1; i >= 0; --i) {
219+
if (configs[i].VRTEnable) {
220+
currentMode = i;
221+
break;
222+
}
223+
}
224+
return configs[currentMode].numGRF;
225+
}
226+
202227
private:
203228
// Parameters associated to a GRF mode
204229
struct Config {

visa/LocalScheduler/G4_Sched.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -715,19 +715,24 @@ bool preRA_Scheduler::runWithGRFSelection(unsigned &KernelPressure) {
715715
unsigned BBRP = rp.getPressure(bb);
716716

717717
unsigned UpperBoundGRF = 0;
718-
if (GRFdecreased && KernelPressure < kernel.grfMode.getVRTMaxGRF())
719-
UpperBoundGRF = kernel.grfMode.getVRTLargerGRF();
718+
if (GRFdecreased && KernelPressure < kernel.grfMode.getMaxGRF())
719+
UpperBoundGRF = kernel.grfMode.getLargerGRF();
720720
Changed |= S.scheduleBlockForLatency(BBRP, Changed, UpperBoundGRF);
721721
}
722722

723723
if (Changed) {
724724
rp.recompute();
725725
KernelPressure = rp.getMaxRP();
726726
}
727-
// In RA extra registers might be needed to satisfy
728-
// some restrictions, e.g. alignment, SIMD size, etc.
729-
// So extra registers are provided.
730-
unsigned ExtraRegs = (unsigned)(kernel.getNumRegTotal() * EXTRA_REGISTERS_FOR_RA / 100.0f);
727+
728+
unsigned ExtraRegs = 0;
729+
if (kernel.grfMode.hasLargerGRFSameThreads()) {
730+
// In RA extra registers might be needed to satisfy some restrictions,
731+
// e.g. alignment, SIMD size, etc. So in order to avoid spill in GRF
732+
// modes smaller than default, extra registers are added to reg pressure.
733+
ExtraRegs =
734+
(unsigned)(kernel.getNumRegTotal() * EXTRA_REGISTERS_FOR_RA / 100.0f);
735+
}
731736
kernel.updateKernelByRegPressure(KernelPressure + ExtraRegs);
732737

733738
return Changed;

0 commit comments

Comments
 (0)