Skip to content

Commit 5103715

Browse files
jgu222igcbot
authored andcommitted
Make sure aliased vector have the same elt size
Vector alias uses a node value as the ID for a group of aliased values. As two vectors of different sizes could be aliased to each other, a node value may be different from the original one and thus has a different element size than the original vector, which would cause incorrect offset calculation. This change fixes that by adding the type of the original base vector into base vector struct. In addition, the previous alignment checking code for subvector isn't complete. This change re-implements it by get all coalesced values and checks alignment for every one of them and selects the max of them.
1 parent f59947c commit 5103715

File tree

5 files changed

+146
-53
lines changed

5 files changed

+146
-53
lines changed

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,8 +509,8 @@ void CShader::CreateAliasVars()
509509
(void) GetSymbol(rV, false, BV->Align);
510510
}
511511
CVariable* rootCVar = GetSymbol(baseVal);
512-
Type* bEltTy = baseVal->getType()->getScalarType();
513-
const uint32_t bEltBytes = (int)m_DL->getTypeStoreSize(bEltTy);
512+
Type* eltTy = BV->OrigType->getScalarType();
513+
uint32_t bEltBytes = (uint32_t)m_DL->getTypeStoreSize(eltTy);
514514

515515
// Generate all vector aliasers and their
516516
// dessa root if any.

IGC/Compiler/CISACodeGen/DeSSA.cpp

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,14 +1317,13 @@ Value* DeSSA::getRootValue(Value* Val, e_alignment* pAlign) const
13171317

13181318
void DeSSA::getAllValuesInCongruentClass(
13191319
Value* V,
1320-
SmallVector<Value*, 8> & ValsInCC)
1320+
SmallVector<Value*, 16> & ValsInCC)
13211321
{
13221322
// Handle InsertElement specially. Note that only rootValue from
13231323
// a sequence of insertElement is in congruent class. The RootValue
13241324
// has its liveness modified to cover all InsertElements that are
13251325
// grouped together.
1326-
Value* RootV = nullptr;
1327-
RootV = getNodeValue(V);
1326+
Value* RootV = getNodeValue(V);
13281327

13291328
IGC_ASSERT_MESSAGE(nullptr != RootV, "ICE: Node value should not be nullptr!");
13301329
ValsInCC.push_back(RootV);
@@ -1339,6 +1338,47 @@ void DeSSA::getAllValuesInCongruentClass(
13391338
return;
13401339
}
13411340

1341+
// All values that are coalesced together, including values that are
1342+
// handled specially, such as ones in aliasMap and insEltMap.
1343+
void DeSSA::getAllCoalescedValues(
1344+
Value* V,
1345+
SmallVector<Value*, 16>& Vals)
1346+
{
1347+
getAllValuesInCongruentClass(V, Vals);
1348+
IGC_ASSERT_MESSAGE(Vals.size() > 0, "ICE: Vals should not be empty!");
1349+
1350+
// First, add values from InsEltMap
1351+
for (int i = 0, sz = (int)Vals.size(); i < sz; ++i) {
1352+
Value* ccVal = Vals[i];
1353+
for (auto II : InsEltMap) {
1354+
Value* R = II.second;
1355+
if (R != ccVal) {
1356+
continue;
1357+
}
1358+
Value* A = II.first;
1359+
if (A != R) {
1360+
Vals.push_back(A);
1361+
}
1362+
}
1363+
}
1364+
1365+
// second, add aliasers from AliasMap
1366+
for (int i = 0, sz = (int)Vals.size(); i < sz; ++i) {
1367+
Value* aliasee = Vals[i];
1368+
for (auto II : AliasMap) {
1369+
Value* R = II.second;
1370+
if (R != aliasee) {
1371+
continue;
1372+
}
1373+
Value* aliaser = II.first;
1374+
if (aliaser != aliasee) {
1375+
Vals.push_back(aliaser);
1376+
}
1377+
}
1378+
}
1379+
return;
1380+
}
1381+
13421382
void DeSSA::coalesceAliasInsertValue(InsertValueInst* theIVI)
13431383
{
13441384
// Find a chain of insertvalue, and return the lead (last one).
@@ -1672,8 +1712,8 @@ bool DeSSA::isSingleValued(llvm::Value* V) const
16721712
// and sort congruent classes before doing interference checking.
16731713
bool DeSSA::interfere(llvm::Value* V0, llvm::Value* V1)
16741714
{
1675-
SmallVector<Value*, 8> allCC0;
1676-
SmallVector<Value*, 8> allCC1;
1715+
SmallVector<Value*, 16> allCC0;
1716+
SmallVector<Value*, 16> allCC1;
16771717
getAllValuesInCongruentClass(V0, allCC0);
16781718
getAllValuesInCongruentClass(V1, allCC1);
16791719

@@ -1700,8 +1740,8 @@ bool DeSSA::interfere(llvm::Value* V0, llvm::Value* V1)
17001740
// with V0 and V1 interference ignored.
17011741
bool DeSSA::aliasInterfere(llvm::Value* V0, llvm::Value* V1)
17021742
{
1703-
SmallVector<Value*, 8> allCC0;
1704-
SmallVector<Value*, 8> allCC1;
1743+
SmallVector<Value*, 16> allCC0;
1744+
SmallVector<Value*, 16> allCC1;
17051745
getAllValuesInCongruentClass(V0, allCC0);
17061746
getAllValuesInCongruentClass(V1, allCC1);
17071747

IGC/Compiler/CISACodeGen/DeSSA.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,12 @@ namespace IGC {
100100

101101
void getAllValuesInCongruentClass(
102102
llvm::Value* V,
103-
llvm::SmallVector<llvm::Value*, 8> & ValsInCC);
103+
llvm::SmallVector<llvm::Value*, 16> & ValsInCC);
104+
105+
// getAllValuesInCongruentClass() + aliasMap + insEltMap, etc.
106+
void getAllCoalescedValues(
107+
llvm::Value* V,
108+
llvm::SmallVector<llvm::Value*, 16>& Vals);
104109

105110
/// print - print partitions in human readable form
106111
virtual void print(llvm::raw_ostream& OS, const llvm::Module* = 0) const override;

IGC/Compiler/CISACodeGen/VariableReuseAnalysis.cpp

Lines changed: 73 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -50,26 +50,28 @@ namespace
5050
};
5151

5252
//Type* eltTy = V->getType()->getScalarType();
53-
const bool is64BitTy = false; // (eltTy->getPrimitiveSizeInBits() > 32);
53+
bool is64BitTy = false; // (eltTy->getPrimitiveSizeInBits() > 32);
5454

5555
// GRF-aligned for send operands
5656
// check if V is defined by send
57-
bool isSend = isa<LoadInst>(V);
57+
if (isa<LoadInst>(V)) {
58+
getSendPayloadAlignment(is64BitTy);
59+
}
5860
if (GenIntrinsicInst* CI = dyn_cast<GenIntrinsicInst>(V))
5961
{
6062
switch (CI->getIntrinsicID()) {
6163
case GenISAIntrinsic::GenISA_sub_group_dpas:
6264
return grfAlignment();
6365
case GenISAIntrinsic::GenISA_simdBlockRead:
6466
case GenISAIntrinsic::GenISA_LSC2DBlockRead:
65-
isSend = true;
66-
break;
67+
return getSendPayloadAlignment(is64BitTy);
6768
default:
6869
break;
6970
}
7071
}
7172

7273
// Check if V is used in send
74+
bool isSend = false;
7375
for (auto UI = V->user_begin(), UE = V->user_end(); UI != UE; ++UI) {
7476
User* U = *UI;
7577
if (isa<LoadInst>(U) || isa<StoreInst>(U))
@@ -486,7 +488,7 @@ void VariableReuseAnalysis::postProcessing()
486488
// m_LifetimeAtEndOfBB is used to keep track of it; for the latter,
487489
// m_LifetimeAt1stDefOfBB is used.
488490
ValueVectorTy AllVals;
489-
SmallVector<Value*, 8> valInCC;
491+
SmallVector<Value*, 16> valInCC;
490492
m_DeSSA->getAllValuesInCongruentClass(aliasee, valInCC);
491493
AllVals.insert(AllVals.end(), valInCC.begin(), valInCC.end());
492494

@@ -529,7 +531,7 @@ void VariableReuseAnalysis::postProcessing()
529531
dessaRootVisited[rootV] = 1;
530532

531533
ValueVectorTy AllVals;
532-
SmallVector<Value*, 8> valInCC;
534+
SmallVector<Value*, 16> valInCC;
533535
m_DeSSA->getAllValuesInCongruentClass(rootV, valInCC);
534536
AllVals.insert(AllVals.end(), valInCC.begin(), valInCC.end());
535537

@@ -666,7 +668,7 @@ void VariableReuseAnalysis::visitExtractElementInst(ExtractElementInst& I)
666668
}
667669

668670
// Valid vec alias and add it into alias map
669-
addVecAlias(EEI_nv, vec_nv, iIdx);
671+
addVecAlias(EEI_nv, vec_nv, vecVal, iIdx);
670672

671673
// Mark this inst as noop inst
672674
m_HasBecomeNoopInsts[EEI] = 1;
@@ -759,7 +761,8 @@ void VariableReuseAnalysis::dumpAlias() const
759761

760762
// Add alias Aliaser -> Aliasee[Idx]
761763
void VariableReuseAnalysis::addVecAlias(
762-
Value* Aliaser, Value* Aliasee, int Idx, e_alignment AliaseeAlign)
764+
Value* Aliaser, Value* Aliasee, Value* OrigBaseVec,
765+
int Idx, e_alignment AliaseeAlign)
763766
{
764767
auto getLargerAlign = [](e_alignment A0, e_alignment A1) -> e_alignment {
765768
if (A0 == EALIGN_AUTO)
@@ -779,7 +782,7 @@ void VariableReuseAnalysis::addVecAlias(
779782
StartIx += SV->StartElementOffset;
780783
}
781784
else {
782-
aliaseeBV = getOrCreateBaseVecDesc(Aliasee, AliaseeAlign);
785+
aliaseeBV = getOrCreateBaseVecDesc(Aliasee, OrigBaseVec, AliaseeAlign);
783786
}
784787
// update align
785788
aliaseeBV->Align = getLargerAlign(aliaseeBV->Align, AliaseeAlign);
@@ -840,10 +843,10 @@ SSubVecDesc* VariableReuseAnalysis::getOrCreateSubVecDesc(Value* V)
840843
}
841844

842845
SBaseVecDesc* VariableReuseAnalysis::getOrCreateBaseVecDesc(Value* V,
843-
e_alignment A)
846+
Value* OV, e_alignment A)
844847
{
845848
if (m_baseVecMap.count(V) == 0) {
846-
SBaseVecDesc* BV = new(Allocator) SBaseVecDesc(V, A);
849+
SBaseVecDesc* BV = new(Allocator) SBaseVecDesc(V, OV, A);
847850
m_baseVecMap.insert(std::make_pair(V, BV));
848851
}
849852
return m_baseVecMap[V];
@@ -1006,6 +1009,14 @@ Value* VariableReuseAnalysis::traceAliasValue(Value* V)
10061009
{
10071010
if (CastInst * CastI = dyn_cast_or_null<CastInst>(V))
10081011
{
1012+
// Only handle Noop cast inst. For example,
1013+
// dst = bitcast <3 x i32> src to <3 x float>,
1014+
// it is okay, but the following isn't.
1015+
// dst = bitcast <3 x i64> src to <6 x i32>
1016+
if (!isNoOpInst(CastI, m_pCtx)) {
1017+
return V;
1018+
}
1019+
10091020
Value* Src = CastI->getOperand(0);
10101021
if (isa<Constant>(Src))
10111022
return CastI;
@@ -1198,7 +1209,7 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy& AllIEIs)
11981209
}
11991210

12001211
e_alignment BaseAlign;
1201-
if (!checkSubAlign(BaseAlign, Sub_nv, Base_nv, BaseStartIx)) {
1212+
if (!checkSubAlign(BaseAlign, Sub, BaseVec, BaseStartIx)) {
12021213
return false;
12031214
}
12041215

@@ -1230,7 +1241,7 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy& AllIEIs)
12301241
}
12311242

12321243
// add alias
1233-
addVecAlias(Sub_nv, Base_nv, BaseStartIx, BaseAlign);
1244+
addVecAlias(Sub_nv, Base_nv, BaseVec, BaseStartIx, BaseAlign);
12341245

12351246
// Make sure noop insts are in the map.
12361247
for (int i = 0, sz = nelts; i < sz; ++i)
@@ -1246,7 +1257,7 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy& AllIEIs)
12461257
if (!m_DeSSA->isNoopAliaser(EEI)) {
12471258
// Set EEI as an aliser, thus it become noop.
12481259
Value *EEI_nv = m_DeSSA->getNodeValue(EEI);
1249-
addVecAlias(EEI_nv, Base_nv, AllIEIs[i].FromVec_eltIx, EALIGN_AUTO);
1260+
addVecAlias(EEI_nv, Base_nv, BaseVec, AllIEIs[i].FromVec_eltIx, EALIGN_AUTO);
12501261
m_HasBecomeNoopInsts[EEI] = 1;
12511262
}
12521263
}
@@ -1378,7 +1389,7 @@ bool VariableReuseAnalysis::processInsertTo(VecInsEltInfoTy& AllIEIs)
13781389
continue;
13791390
}
13801391
}
1381-
addVecAlias(V_nv, Base_nv, V_ix, BaseAlign);
1392+
addVecAlias(V_nv, Base_nv, FirstIEI, V_ix, BaseAlign);
13821393

13831394
int V_sz = getNumElts(V);
13841395
if (V_sz > 1)
@@ -1399,7 +1410,7 @@ bool VariableReuseAnalysis::processInsertTo(VecInsEltInfoTy& AllIEIs)
13991410
if (!m_DeSSA->isNoopAliaser(EEI)) {
14001411
// EEI should be in alias map so it can be marked as noop
14011412
Value *EEI_nv = m_DeSSA->getNodeValue(EEI);
1402-
addVecAlias(EEI_nv, Base_nv, j);
1413+
addVecAlias(EEI_nv, Base_nv, FirstIEI, j);
14031414
m_HasBecomeNoopInsts[EEI] = 1;
14041415
}
14051416
}
@@ -1482,8 +1493,13 @@ bool VariableReuseAnalysis::aliasInterfere(Value* Sub, Value* Base, int BaseIdx)
14821493
bool VariableReuseAnalysis::isCandidateUse(Value* V) const
14831494
{
14841495
for (User* U : V->users()) {
1485-
if (GenIntrinsicInst* CI = dyn_cast<GenIntrinsicInst>(U)) {
1486-
switch (CI->getIntrinsicID()) {
1496+
Value* V = U;
1497+
CastInst* CI = dyn_cast<CastInst>(V);
1498+
if (CI && isNoOpInst(CI, m_pCtx)) {
1499+
V = CI->getOperand(0);
1500+
}
1501+
if (GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(V)) {
1502+
switch (GII->getIntrinsicID()) {
14871503
case GenISAIntrinsic::GenISA_sub_group_dpas:
14881504
case GenISAIntrinsic::GenISA_LSC2DBlockWrite:
14891505
case GenISAIntrinsic::GenISA_simdBlockWrite:
@@ -1492,7 +1508,7 @@ bool VariableReuseAnalysis::isCandidateUse(Value* V) const
14921508
break;
14931509
}
14941510
}
1495-
else if (StoreInst* SI = dyn_cast<StoreInst>(U)) {
1511+
else if (StoreInst* SI = dyn_cast<StoreInst>(V)) {
14961512
return true;
14971513
}
14981514
}
@@ -1502,8 +1518,13 @@ bool VariableReuseAnalysis::isCandidateUse(Value* V) const
15021518
// Check if a value is defined by instructions that we handle.
15031519
bool VariableReuseAnalysis::isCandidateDef(Value* V) const
15041520
{
1505-
if (GenIntrinsicInst* CI = dyn_cast<GenIntrinsicInst>(V)) {
1506-
switch (CI->getIntrinsicID()) {
1521+
Value* Val = V;
1522+
CastInst* CI = dyn_cast<CastInst>(Val);
1523+
if (CI && isNoOpInst(CI, m_pCtx)) {
1524+
Val = CI->getOperand(0);
1525+
}
1526+
if (GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(Val)) {
1527+
switch (GII->getIntrinsicID()) {
15071528
case GenISAIntrinsic::GenISA_sub_group_dpas:
15081529
case GenISAIntrinsic::GenISA_LSC2DBlockWrite:
15091530
case GenISAIntrinsic::GenISA_simdBlockWrite:
@@ -1512,19 +1533,24 @@ bool VariableReuseAnalysis::isCandidateDef(Value* V) const
15121533
break;
15131534
}
15141535
}
1515-
else if (StoreInst* SI = dyn_cast<StoreInst>(V)) {
1536+
else if (StoreInst* SI = dyn_cast<StoreInst>(Val)) {
15161537
return true;
15171538
}
15181539
return false;
15191540
}
15201541

1521-
// Given node values Subvec_nd and Basevec_nd. This function checks
1522-
// if Subvec can be a sub-vector of Basevec_nd at base idex Base_ix.
1523-
// If so, return true, and set the correct alignment requirement
1524-
// to BaseAlign.
1525-
bool VariableReuseAnalysis::checkSubAlign (e_alignment& BaseAlign,
1526-
Value* Subvec_nd, Value* Basevec_nd, int Base_ix)
1542+
// Check if SubVec is aligned if it becomes a sub-vector at Base_ix of
1543+
// BaseVec. If so, return true with SubVec alignment in BaseAlign.
1544+
bool VariableReuseAnalysis::checkSubAlign(e_alignment& BaseAlign,
1545+
Value* SubVec, Value* BaseVec, int Base_ix)
15271546
{
1547+
auto maxAlign = [](e_alignment A, e_alignment B) {
1548+
if (A == EALIGN_AUTO)
1549+
return B;
1550+
if (B == EALIGN_AUTO)
1551+
return A;
1552+
return A > B ? A : B;
1553+
};
15281554

15291555
auto toBytes = [](e_alignment A) {
15301556
switch (A) {
@@ -1542,11 +1568,21 @@ bool VariableReuseAnalysis::checkSubAlign (e_alignment& BaseAlign,
15421568
};
15431569

15441570
BaseAlign = EALIGN_AUTO;
1545-
Type* eltTy = Basevec_nd->getType()->getScalarType();
1546-
uint32_t eltBytes = ((uint32_t)eltTy->getPrimitiveSizeInBits() / 8);
1547-
Type* sEltTy = Subvec_nd->getType()->getScalarType();
1548-
IGC_ASSERT(eltBytes == ((uint32_t)sEltTy->getPrimitiveSizeInBits() / 8));
1549-
e_alignment sub_align = getMinAlignment(Subvec_nd, m_WIA, m_pCtx);
1571+
1572+
// Get element bytes from original base vector
1573+
Type* eltTy = BaseVec->getType()->getScalarType();
1574+
uint32_t eltBytes = (uint32_t)m_DL->getTypeStoreSize(eltTy);
1575+
1576+
// get all coalesced values for subvec and find the max alignment
1577+
SmallVector<Value*, 16> allVals;
1578+
m_DeSSA->getAllCoalescedValues(SubVec, allVals);
1579+
1580+
e_alignment sub_align = EALIGN_AUTO;
1581+
for (auto II : allVals) {
1582+
Value* V = II;
1583+
e_alignment thisAlign = getMinAlignment(V, m_WIA, m_pCtx);
1584+
sub_align = maxAlign(sub_align, thisAlign);
1585+
}
15501586
int sub_alignBytes = toBytes(sub_align);
15511587
if (sub_alignBytes == 0) {
15521588
// AUTO align is fine.
@@ -1555,10 +1591,12 @@ bool VariableReuseAnalysis::checkSubAlign (e_alignment& BaseAlign,
15551591

15561592
// m_SimdSize is unavailable, using smallest simdsize for now.
15571593
int simdsize = numLanes(m_pCtx->platform.getMinDispatchMode());
1558-
int uLanes = (m_WIA->isUniform(Basevec_nd) ? 1 : simdsize);
1594+
int uLanes = (m_WIA->isUniform(BaseVec) ? 1 : simdsize);
15591595
// If base is an aliaser at this time, must check its aliasee
1596+
1597+
Value* BaseVec_nd = m_DeSSA->getNodeValue(BaseVec);
15601598
int ix1 = 0;
1561-
auto MII = m_aliasMap.find(Basevec_nd);
1599+
auto MII = m_aliasMap.find(BaseVec_nd);
15621600
if (MII != m_aliasMap.end()) {
15631601
SSubVecDesc* SV = MII->second;
15641602
ix1 = SV->StartElementOffset;

0 commit comments

Comments
 (0)