Skip to content

Commit 95d1220

Browse files
jfuentesfda0
authored andcommitted
Update GRF selection heuristic
(cherry picked from commit 91de6a0)
1 parent 3217b81 commit 95d1220

File tree

3 files changed

+57
-25
lines changed

3 files changed

+57
-25
lines changed

visa/G4_Kernel.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -620,11 +620,11 @@ void G4_Kernel::calculateSimdSize() {
620620
// Updates kernel's related structures to large GRF
621621
//
622622
bool G4_Kernel::updateKernelToLargerGRF() {
623-
if (numRegTotal == grfMode.getVRTMaxGRF())
623+
if (numRegTotal == grfMode.getMaxGRF())
624624
return false;
625625

626626
// Scale number of GRFs, Acc, SWSB tokens.
627-
setKernelParameters(grfMode.getVRTLargerGRF());
627+
setKernelParameters(grfMode.moveToLargerGRF());
628628
fg.builder->rebuildPhyRegPool(getNumRegTotal());
629629
return true;
630630
}
@@ -633,11 +633,11 @@ bool G4_Kernel::updateKernelToLargerGRF() {
633633
// Updates kernel's related structures to smaller GRF
634634
//
635635
bool G4_Kernel::updateKernelToSmallerGRF() {
636-
if (numRegTotal == grfMode.getVRTMinGRF())
636+
if (numRegTotal == grfMode.getMinGRF())
637637
return false;
638638

639639
// Scale number of GRFs, Acc, SWSB tokens.
640-
setKernelParameters(grfMode.getVRTSmallerGRF());
640+
setKernelParameters(grfMode.moveToSmallerGRF());
641641
fg.builder->rebuildPhyRegPool(getNumRegTotal());
642642
return true;
643643
}
@@ -653,7 +653,7 @@ void G4_Kernel::updateKernelByRegPressure(unsigned regPressure) {
653653
largestInputReg = std::max(largestInputReg, maxRegPayloadDispatch);
654654
}
655655

656-
unsigned newGRF = grfMode.findModeByRegPressure(regPressure, largestInputReg);
656+
unsigned newGRF = grfMode.setModeByRegPressure(regPressure, largestInputReg);
657657

658658
if (newGRF == numRegTotal)
659659
return;
@@ -2084,25 +2084,27 @@ GRFMode::GRFMode(const TARGET_PLATFORM platform, Options *op) : options(op) {
20842084
currentMode = defaultMode;
20852085
}
20862086

2087-
unsigned GRFMode::findModeByRegPressure(unsigned maxRP, unsigned largestInputReg) {
2088-
unsigned size = configs.size();
2089-
unsigned i = 0, newGRF = 0;
2087+
unsigned GRFMode::setModeByRegPressure(unsigned maxRP, unsigned largestInputReg) {
2088+
unsigned size = configs.size(), i = 0;
20902089
// find appropiate GRF based on reg pressure
20912090
for (; i < size; i++) {
20922091
if (configs[i].VRTEnable && maxRP <= configs[i].numGRF &&
20932092
// Check that we've at least 8 GRFs over and above
20942093
// those blocked for kernel input. This helps cases
20952094
// where an 8 GRF variable shows up in entry BB.
20962095
(largestInputReg + 8) <= configs[i].numGRF) {
2097-
newGRF = configs[i].numGRF;
2096+
currentMode = i;
20982097
break;
20992098
}
21002099
}
2100+
return configs[currentMode].numGRF;
2101+
}
21012102

2102-
// if not found, pressure is too high
2103-
// set largest grf mode
2104-
if (i == size)
2105-
newGRF = getVRTMaxGRF();
2103+
// Check if next larger GRF has the same number of threads per EU
2104+
bool GRFMode::hasLargerGRFSameThreads() const {
2105+
unsigned largerGrfIdx = currentMode + 1;
2106+
if (largerGrfIdx == configs.size() || !configs[largerGrfIdx].VRTEnable)
2107+
return false;
21062108

2107-
return newGRF;
2109+
return configs[currentMode].numThreads == configs[largerGrfIdx].numThreads;
21082110
}

visa/G4_Kernel.hpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ class GRFMode {
148148
return iter != configs.end();
149149
}
150150

151-
unsigned findModeByRegPressure(unsigned maxRP, unsigned largestInputReg);
151+
unsigned setModeByRegPressure(unsigned maxRP, unsigned largestInputReg);
152+
bool hasLargerGRFSameThreads() const;
152153

153154
unsigned getNumGRF() const { return configs[currentMode].numGRF; }
154155
unsigned getDefaultGRF() const { return configs[defaultMode].numGRF; }
@@ -170,19 +171,20 @@ class GRFMode {
170171
unsigned getNumAcc() const { return configs[currentMode].numAcc; }
171172

172173
// ----- helper functions for autoGRFSelection (VRT) ----- //
173-
unsigned getVRTMinGRF() const {
174+
unsigned getMinGRF() const {
174175
auto found = std::find_if(configs.begin(), configs.end(),
175176
[](const Config &c) { return c.VRTEnable; });
176177
return found->numGRF;
177178
}
178179

179-
unsigned getVRTMaxGRF() const {
180+
unsigned getMaxGRF() const {
180181
auto found = std::find_if(configs.rbegin(), configs.rend(),
181182
[](const Config &c) { return c.VRTEnable; });
182183
return found->numGRF;
183184
}
184185

185-
unsigned getVRTLargerGRF() const {
186+
// Get the next larger GRF available
187+
unsigned getLargerGRF() const {
186188
// find the first larger mode that's available for VRT
187189
for (auto i = currentMode + 1; i < configs.size(); ++i) {
188190
if (configs[i].VRTEnable)
@@ -191,14 +193,37 @@ class GRFMode {
191193
return configs[currentMode].numGRF;
192194
}
193195

194-
unsigned getVRTSmallerGRF() const {
196+
// Get the next smaller GRF available
197+
unsigned getSmallerGRF() const {
195198
for (auto i = static_cast<int>(currentMode) - 1; i >= 0 ; --i) {
196199
if (configs[i].VRTEnable)
197200
return configs[i].numGRF;
198201
}
199202
return configs[currentMode].numGRF;
200203
}
201204

205+
// Move GRF mode to the larger GRF available and return the number
206+
unsigned moveToLargerGRF() {
207+
for (auto i = currentMode + 1; i < configs.size(); ++i) {
208+
if (configs[i].VRTEnable) {
209+
currentMode = i;
210+
break;
211+
}
212+
}
213+
return configs[currentMode].numGRF;
214+
}
215+
216+
// Move GRF mode to the smaller GRF available and return the number
217+
unsigned moveToSmallerGRF() {
218+
for (auto i = currentMode - 1; i >= 0; --i) {
219+
if (configs[i].VRTEnable) {
220+
currentMode = i;
221+
break;
222+
}
223+
}
224+
return configs[currentMode].numGRF;
225+
}
226+
202227
private:
203228
// Parameters associated to a GRF mode
204229
struct Config {

visa/LocalScheduler/G4_Sched.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -715,19 +715,24 @@ bool preRA_Scheduler::runWithGRFSelection(unsigned &KernelPressure) {
715715
unsigned BBRP = rp.getPressure(bb);
716716

717717
unsigned UpperBoundGRF = 0;
718-
if (GRFdecreased && KernelPressure < kernel.grfMode.getVRTMaxGRF())
719-
UpperBoundGRF = kernel.grfMode.getVRTLargerGRF();
718+
if (GRFdecreased && KernelPressure < kernel.grfMode.getMaxGRF())
719+
UpperBoundGRF = kernel.grfMode.getLargerGRF();
720720
Changed |= S.scheduleBlockForLatency(BBRP, Changed, UpperBoundGRF);
721721
}
722722

723723
if (Changed) {
724724
rp.recompute();
725725
KernelPressure = rp.getMaxRP();
726726
}
727-
// In RA extra registers might be needed to satisfy
728-
// some restrictions, e.g. alignment, SIMD size, etc.
729-
// So extra registers are provided.
730-
unsigned ExtraRegs = (unsigned)(kernel.getNumRegTotal() * EXTRA_REGISTERS_FOR_RA / 100.0f);
727+
728+
unsigned ExtraRegs = 0;
729+
if (kernel.grfMode.hasLargerGRFSameThreads()) {
730+
// In RA extra registers might be needed to satisfy some restrictions,
731+
// e.g. alignment, SIMD size, etc. So in order to avoid spill in GRF
732+
// modes smaller than default, extra registers are added to reg pressure.
733+
ExtraRegs =
734+
(unsigned)(kernel.getNumRegTotal() * EXTRA_REGISTERS_FOR_RA / 100.0f);
735+
}
731736
kernel.updateKernelByRegPressure(KernelPressure + ExtraRegs);
732737

733738
return Changed;

0 commit comments

Comments
 (0)