Skip to content

Commit a16125c

Browse files
MaciejKalinskisys_zuul
authored andcommitted
NeedSplitting new checks and ScanReduce common code. 1. NeedSplitting: add extra checks for SIMD8 and lower execution sizes. Needed to detect certain rare cases, to indicate early that the code must be corrected - either the code being developed, or SplitVariable() function to support these new cases. 2. Added ScanReducePrepareSrc() function, used in several places for src preparation in scan-reduce emitters.
Change-Id: I82d87c893f3ace8c326710759fd1f71e35b66bba
1 parent fa74a1a commit a16125c

File tree

4 files changed

+178
-103
lines changed

4 files changed

+178
-103
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 107 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ using namespace llvm;
5959

6060
namespace IGC
6161
{
62-
6362
Common_ISA_Exec_Size getExecSize(SIMDMode width)
6463
{
6564
switch (width)
@@ -204,6 +203,67 @@ namespace IGC
204203
return static_cast<Common_ISA_SVM_Block_Num>(~0U);
205204
}
206205

206+
constexpr unsigned visaNumLanes(Common_ISA_Exec_Size execSize)
207+
{
208+
unsigned lanes = 0;
209+
switch (execSize)
210+
{
211+
case EXEC_SIZE_1: lanes = 1; break;
212+
case EXEC_SIZE_2: lanes = 2; break;
213+
case EXEC_SIZE_4: lanes = 4; break;
214+
case EXEC_SIZE_8: lanes = 8; break;
215+
case EXEC_SIZE_16: lanes = 16; break;
216+
case EXEC_SIZE_32: lanes = 32; break;
217+
default: assert(false);
218+
}
219+
return lanes;
220+
}
221+
222+
// Take certain attributes of either src or dst instruction operand and return the size
223+
// of the associated grf region, accessed during instruction's execution, in bytes.
224+
// If aligned==true, the size includes length of data block starting at the beginning of grf
225+
// and ending at the subReg; this is useful to check if the region crosses 2 grf boundary.
226+
// If special region attribute is not set, the regioning is <1; 1, 0> for src and <1> for dst.
227+
// Note that the assertions may hit in certain cases, which should be handled separately,
228+
// like uniform vars with operand with special region set.
229+
constexpr unsigned GrfRegionSize(Common_ISA_Exec_Size execSize, unsigned elementSize,
230+
const SModifier& mod, bool isSource, bool aligned = true)
231+
{
232+
constexpr unsigned grfSize = 32; // in bytes
233+
// If subReg is big enough to cross grf boundary, adjust it.
234+
const unsigned base = (mod.subReg * elementSize) % grfSize;
235+
unsigned lastInRegion = aligned ? base : 0;
236+
if (isSource)
237+
{
238+
// Formula based on algorithm provided in the spec (see Region Parameters)
239+
const unsigned vstride = mod.specialRegion ? mod.region[0] : 1;
240+
const unsigned width = mod.specialRegion ? mod.region[1] : 1;
241+
const unsigned hstride = mod.specialRegion ? mod.region[2] : 0;
242+
assert(width != 0);
243+
const unsigned height = visaNumLanes(execSize) / width;
244+
assert(height != 0);
245+
lastInRegion += (height - 1) * vstride * elementSize +
246+
(width - 1) * hstride * elementSize;
247+
}
248+
else
249+
{
250+
const unsigned hstride = mod.specialRegion ? mod.region[2] : 1;
251+
lastInRegion += (visaNumLanes(execSize) - 1) * hstride * elementSize;
252+
}
253+
return lastInRegion + elementSize;
254+
};
255+
// Compile-time ULTs for GrfRegionSize()
256+
static_assert(GrfRegionSize(EXEC_SIZE_16, 4, SModifier{}, false) == 64 &&
257+
GrfRegionSize(EXEC_SIZE_16, 4, SModifier{ 16, {}, {0,0,2}, {}, {}, true }, false) == 124 &&
258+
GrfRegionSize(EXEC_SIZE_16, 4, SModifier{ 15, {}, {0,0,2}, {}, {}, true }, false) == 124 + 7*4 &&
259+
GrfRegionSize(EXEC_SIZE_8, 8, SModifier{ 1, {}, {0,0,2}, {}, {}, true }, false) == 128,
260+
"GrfRegionSize compile-time test failed - dst.");
261+
static_assert(GrfRegionSize(EXEC_SIZE_16, 4, SModifier{}, true) == 64 &&
262+
GrfRegionSize(EXEC_SIZE_16, 4, SModifier{ {}, {}, {4,4,0}, {}, {}, true }, true) == 52 &&
263+
GrfRegionSize(EXEC_SIZE_8, 8, SModifier{ 8, {}, {2,1,0}, {}, {}, true }, true) == 120 &&
264+
GrfRegionSize(EXEC_SIZE_8, 8, SModifier{ 10, {}, {2,1,0}, {}, {}, true }, true) == 120 + 2*8,
265+
"GrfRegionSize compile-time test failed - src.");
266+
207267
// split a SIMD16 variable into two SIMD8 while satisfying vISA's raw operand alignment
208268
// return a tuple representing the vISA raw operand (var + offset) after split
209269
std::tuple<CVariable*, uint32_t> CEncoder::splitRawOperand(CVariable* var, bool isFirstHalf,
@@ -1023,37 +1083,55 @@ namespace IGC
10231083
// numParts - return the total parts to be split, e.g. if the region spans 4
10241084
// GRFs, it needs splitting into 2 parts at least.
10251085
bool CEncoder::NeedSplitting(CVariable* var, const SModifier& mod,
1026-
unsigned& numParts,
1027-
bool isSource) const {
1086+
unsigned& numParts, bool isSource) const
1087+
{
10281088
// If nothing is specified, don't split.
10291089
if (!var)
1090+
{
10301091
return false;
1092+
}
1093+
10311094
// Only handle SIMD16 now! We assume all data movements in SIMD8 will honor
10321095
// the region rules.
10331096
Common_ISA_Exec_Size simdSize = GetAluExecSize(var);
1034-
switch (simdSize) {
1035-
default:
1036-
return false;
1097+
const unsigned elemSize = var->GetElemSize();
1098+
1099+
switch (simdSize)
1100+
{
10371101
case EXEC_SIZE_16:
10381102
break;
1039-
// NOTE that SIMD32 will be supported differently based on the current
1040-
// implementation!
1103+
default:
1104+
{
1105+
// Checks for some rare cases that are not handled by the splitter, but should be detected and reported.
1106+
// Example: mov (8|M0) r4.0<1>:q r31.0<2;1,0>:q
1107+
constexpr unsigned maxBlockSize = 64; // size of 2 GRFs in bytes
1108+
// For uniform variables (which implies simdSize==1) the emitter may set regions with width>1.
1109+
// As it may happen in various places, we detect it here.
1110+
assert(var->IsUniform() || GrfRegionSize(simdSize, elemSize, mod, isSource) <= maxBlockSize);
1111+
return false;
1112+
}
10411113
}
10421114

10431115
// Only general variables need splitting so far.
10441116
if (var->GetVarType() != EVARTYPE_GENERAL)
1117+
{
10451118
return false;
1119+
}
10461120

10471121
// Only varying variable need splitting so far.
10481122
// NOTE: uniform variable is assumed to take less than 2 GRF+.
10491123
if (var->IsUniform())
1124+
{
10501125
return false;
1126+
}
10511127

1052-
unsigned elemSize = var->GetElemSize();
10531128
// We assume there is no 2 GRF crossing when element size is smaller than
10541129
// 4 bytes (or 32 bits), e.g. 16-bit WORD.
10551130
if (elemSize < 4)
1131+
{
10561132
return false;
1133+
}
1134+
10571135
// If the data type has more than 4 bytes, i.e. 32 bits, it already crosses
10581136
// 2+ GRFs by itself. There's no need to check further.
10591137
if (elemSize > 4)
@@ -1062,8 +1140,10 @@ namespace IGC
10621140
assert((isSource || !mod.specialRegion) &&
10631141
"It's expected that there's no special region associated with "
10641142
"QWORD type destination!");
1065-
if (isSource && mod.specialRegion) {
1066-
if (mod.region[1] == 1 && mod.region[0] == 0) {
1143+
if (isSource && mod.specialRegion)
1144+
{
1145+
if (mod.region[1] == 1 && mod.region[0] == 0)
1146+
{
10671147
// src region is <0;1,x>, can't cross 2 GRF. No need to split.
10681148
return false;
10691149
}
@@ -1072,29 +1152,43 @@ namespace IGC
10721152

10731153
numParts = std::max(numParts, 2U);
10741154
return true;
1155+
10751156
}
10761157

10771158

10781159
// For 32-bit data types, without special region, they won't cross 2+ GRFs.
10791160
if (!mod.specialRegion)
1161+
{
10801162
return false;
1163+
}
10811164

10821165
// Check regioning.
1083-
if (isSource) {
1166+
if (isSource)
1167+
{
10841168
// FIXME: Need better support for region with non-1 width.
10851169
if (mod.region[1] != 1)
1170+
{
10861171
return false;
1172+
}
1173+
10871174
if (mod.region[0] < 2)
1175+
{
10881176
return false;
1177+
}
1178+
10891179
// For src with width set to 1, region with > 1 vstride needs
10901180
// splitting.
10911181
numParts = std::max(numParts, unsigned(mod.region[0]));
10921182
return true;
10931183
}
1184+
10941185
if (mod.region[2] < 2)
1186+
{
10951187
return false;
1096-
numParts = std::max(numParts, unsigned(mod.region[2]));
1188+
}
1189+
10971190
// For dst, region with > 1 hstride needs splitting.
1191+
numParts = std::max(numParts, unsigned(mod.region[2]));
10981192
return true;
10991193
}
11001194

IGC/Compiler/CISACodeGen/CISABuilder.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,9 +468,10 @@ namespace IGC
468468
Common_ISA_Exec_Size GetAluExecSize(CVariable* dst) const;
469469
Common_VISA_EMask_Ctrl GetAluEMask(CVariable* dst);
470470
bool IsSat();
471+
472+
// Variable splitting facilities (if crosses 2 GRF boundary).
471473
bool NeedSplitting(CVariable* var, const SModifier& mod,
472-
unsigned& numParts,
473-
bool isSource = false) const;
474+
unsigned& numParts, bool isSource = false) const;
474475
SModifier SplitVariable(Common_ISA_Exec_Size fromExecSize,
475476
Common_ISA_Exec_Size toExecSize,
476477
unsigned thePart,

0 commit comments

Comments
 (0)