@@ -59,7 +59,6 @@ using namespace llvm;
59
59
60
60
namespace IGC
61
61
{
62
-
63
62
Common_ISA_Exec_Size getExecSize (SIMDMode width)
64
63
{
65
64
switch (width)
@@ -204,6 +203,67 @@ namespace IGC
204
203
return static_cast <Common_ISA_SVM_Block_Num>(~0U );
205
204
}
206
205
206
+ constexpr unsigned visaNumLanes (Common_ISA_Exec_Size execSize)
207
+ {
208
+ unsigned lanes = 0 ;
209
+ switch (execSize)
210
+ {
211
+ case EXEC_SIZE_1: lanes = 1 ; break ;
212
+ case EXEC_SIZE_2: lanes = 2 ; break ;
213
+ case EXEC_SIZE_4: lanes = 4 ; break ;
214
+ case EXEC_SIZE_8: lanes = 8 ; break ;
215
+ case EXEC_SIZE_16: lanes = 16 ; break ;
216
+ case EXEC_SIZE_32: lanes = 32 ; break ;
217
+ default : assert (false );
218
+ }
219
+ return lanes;
220
+ }
221
+
222
+ // Take certain attributes of either src or dst instruction operand and return the size
223
+ // of the associated grf region, accessed during instruction's execution, in bytes.
224
+ // If aligned==true, the size includes length of data block starting at the beginning of grf
225
+ // and ending at the subReg; this is useful to check if the region crosses 2 grf boundary.
226
+ // If special region attribute is not set, the regioning is <1; 1, 0> for src and <1> for dst.
227
+ // Note that the assertions may hit in certain cases, which should be handled separately,
228
+ // like uniform vars with operand with special region set.
229
+ constexpr unsigned GrfRegionSize (Common_ISA_Exec_Size execSize, unsigned elementSize,
230
+ const SModifier& mod, bool isSource, bool aligned = true )
231
+ {
232
+ constexpr unsigned grfSize = 32 ; // in bytes
233
+ // If subReg is big enough to cross grf boundary, adjust it.
234
+ const unsigned base = (mod.subReg * elementSize) % grfSize;
235
+ unsigned lastInRegion = aligned ? base : 0 ;
236
+ if (isSource)
237
+ {
238
+ // Formula based on algorithm provided in the spec (see Region Parameters)
239
+ const unsigned vstride = mod.specialRegion ? mod.region [0 ] : 1 ;
240
+ const unsigned width = mod.specialRegion ? mod.region [1 ] : 1 ;
241
+ const unsigned hstride = mod.specialRegion ? mod.region [2 ] : 0 ;
242
+ assert (width != 0 );
243
+ const unsigned height = visaNumLanes (execSize) / width;
244
+ assert (height != 0 );
245
+ lastInRegion += (height - 1 ) * vstride * elementSize +
246
+ (width - 1 ) * hstride * elementSize;
247
+ }
248
+ else
249
+ {
250
+ const unsigned hstride = mod.specialRegion ? mod.region [2 ] : 1 ;
251
+ lastInRegion += (visaNumLanes (execSize) - 1 ) * hstride * elementSize;
252
+ }
253
+ return lastInRegion + elementSize;
254
+ };
255
+ // Compile-time ULTs for GrfRegionSize()
256
+ static_assert (GrfRegionSize(EXEC_SIZE_16, 4 , SModifier{}, false ) == 64 &&
257
+ GrfRegionSize (EXEC_SIZE_16, 4 , SModifier{ 16 , {}, {0 ,0 ,2 }, {}, {}, true }, false ) == 124 &&
258
+ GrfRegionSize (EXEC_SIZE_16, 4 , SModifier{ 15 , {}, {0 ,0 ,2 }, {}, {}, true }, false ) == 124 + 7 *4 &&
259
+ GrfRegionSize (EXEC_SIZE_8, 8 , SModifier{ 1 , {}, {0 ,0 ,2 }, {}, {}, true }, false ) == 128 ,
260
+ " GrfRegionSize compile-time test failed - dst." );
261
+ static_assert (GrfRegionSize(EXEC_SIZE_16, 4 , SModifier{}, true ) == 64 &&
262
+ GrfRegionSize (EXEC_SIZE_16, 4 , SModifier{ {}, {}, {4 ,4 ,0 }, {}, {}, true }, true ) == 52 &&
263
+ GrfRegionSize (EXEC_SIZE_8, 8 , SModifier{ 8 , {}, {2 ,1 ,0 }, {}, {}, true }, true ) == 120 &&
264
+ GrfRegionSize (EXEC_SIZE_8, 8 , SModifier{ 10 , {}, {2 ,1 ,0 }, {}, {}, true }, true ) == 120 + 2 *8 ,
265
+ " GrfRegionSize compile-time test failed - src." );
266
+
207
267
// split a SIMD16 variable into two SIMD8 while satisfying vISA's raw operand alignment
208
268
// return a tuple representing the vISA raw operand (var + offset) after split
209
269
std::tuple<CVariable*, uint32_t > CEncoder::splitRawOperand (CVariable* var, bool isFirstHalf,
@@ -1023,37 +1083,55 @@ namespace IGC
1023
1083
// numParts - return the total parts to be split, e.g. if the region spans 4
1024
1084
// GRFs, it needs splitting into 2 parts at least.
1025
1085
bool CEncoder::NeedSplitting (CVariable* var, const SModifier& mod,
1026
- unsigned & numParts,
1027
- bool isSource) const {
1086
+ unsigned & numParts, bool isSource) const
1087
+ {
1028
1088
// If nothing is specified, don't split.
1029
1089
if (!var)
1090
+ {
1030
1091
return false ;
1092
+ }
1093
+
1031
1094
// Only handle SIMD16 now! We assume all data movements in SIMD8 will honor
1032
1095
// the region rules.
1033
1096
Common_ISA_Exec_Size simdSize = GetAluExecSize (var);
1034
- switch (simdSize) {
1035
- default :
1036
- return false ;
1097
+ const unsigned elemSize = var->GetElemSize ();
1098
+
1099
+ switch (simdSize)
1100
+ {
1037
1101
case EXEC_SIZE_16:
1038
1102
break ;
1039
- // NOTE that SIMD32 will be supported differently based on the current
1040
- // implementation!
1103
+ default :
1104
+ {
1105
+ // Checks for some rare cases that are not handled by the splitter, but should be detected and reported.
1106
+ // Example: mov (8|M0) r4.0<1>:q r31.0<2;1,0>:q
1107
+ constexpr unsigned maxBlockSize = 64 ; // size of 2 GRFs in bytes
1108
+ // For uniform variables (which implies simdSize==1) the emitter may set regions with width>1.
1109
+ // As it may happen in various places, we detect it here.
1110
+ assert (var->IsUniform () || GrfRegionSize (simdSize, elemSize, mod, isSource) <= maxBlockSize);
1111
+ return false ;
1112
+ }
1041
1113
}
1042
1114
1043
1115
// Only general variables need splitting so far.
1044
1116
if (var->GetVarType () != EVARTYPE_GENERAL)
1117
+ {
1045
1118
return false ;
1119
+ }
1046
1120
1047
1121
// Only varying variable need splitting so far.
1048
1122
// NOTE: uniform variable is assumed to take less than 2 GRF+.
1049
1123
if (var->IsUniform ())
1124
+ {
1050
1125
return false ;
1126
+ }
1051
1127
1052
- unsigned elemSize = var->GetElemSize ();
1053
1128
// We assume there is no 2 GRF crossing when element size is smaller than
1054
1129
// 4 bytes (or 32 bits), e.g. 16-bit WORD.
1055
1130
if (elemSize < 4 )
1131
+ {
1056
1132
return false ;
1133
+ }
1134
+
1057
1135
// If the data type has more than 4 bytes, i.e. 32 bits, it already crosses
1058
1136
// 2+ GRFs by itself. There's no need to check further.
1059
1137
if (elemSize > 4 )
@@ -1062,8 +1140,10 @@ namespace IGC
1062
1140
assert ((isSource || !mod.specialRegion ) &&
1063
1141
" It's expected that there's no special region associated with "
1064
1142
" QWORD type destination!" );
1065
- if (isSource && mod.specialRegion ) {
1066
- if (mod.region [1 ] == 1 && mod.region [0 ] == 0 ) {
1143
+ if (isSource && mod.specialRegion )
1144
+ {
1145
+ if (mod.region [1 ] == 1 && mod.region [0 ] == 0 )
1146
+ {
1067
1147
// src region is <0;1,x>, can't cross 2 GRF. No need to split.
1068
1148
return false ;
1069
1149
}
@@ -1072,29 +1152,43 @@ namespace IGC
1072
1152
1073
1153
numParts = std::max (numParts, 2U );
1074
1154
return true ;
1155
+
1075
1156
}
1076
1157
1077
1158
1078
1159
// For 32-bit data types, without special region, they won't cross 2+ GRFs.
1079
1160
if (!mod.specialRegion )
1161
+ {
1080
1162
return false ;
1163
+ }
1081
1164
1082
1165
// Check regioning.
1083
- if (isSource) {
1166
+ if (isSource)
1167
+ {
1084
1168
// FIXME: Need better support for region with non-1 width.
1085
1169
if (mod.region [1 ] != 1 )
1170
+ {
1086
1171
return false ;
1172
+ }
1173
+
1087
1174
if (mod.region [0 ] < 2 )
1175
+ {
1088
1176
return false ;
1177
+ }
1178
+
1089
1179
// For src with width set to 1, region with > 1 vstride needs
1090
1180
// splitting.
1091
1181
numParts = std::max (numParts, unsigned (mod.region [0 ]));
1092
1182
return true ;
1093
1183
}
1184
+
1094
1185
if (mod.region [2 ] < 2 )
1186
+ {
1095
1187
return false ;
1096
- numParts = std::max (numParts, unsigned (mod.region [2 ]));
1188
+ }
1189
+
1097
1190
// For dst, region with > 1 hstride needs splitting.
1191
+ numParts = std::max (numParts, unsigned (mod.region [2 ]));
1098
1192
return true ;
1099
1193
}
1100
1194
0 commit comments