@@ -50,26 +50,28 @@ namespace
50
50
};
51
51
52
52
// Type* eltTy = V->getType()->getScalarType();
53
- const bool is64BitTy = false ; // (eltTy->getPrimitiveSizeInBits() > 32);
53
+ bool is64BitTy = false ; // (eltTy->getPrimitiveSizeInBits() > 32);
54
54
55
55
// GRF-aligned for send operands
56
56
// check if V is defined by send
57
- bool isSend = isa<LoadInst>(V);
57
+ if (isa<LoadInst>(V)) {
58
+ getSendPayloadAlignment (is64BitTy);
59
+ }
58
60
if (GenIntrinsicInst* CI = dyn_cast<GenIntrinsicInst>(V))
59
61
{
60
62
switch (CI->getIntrinsicID ()) {
61
63
case GenISAIntrinsic::GenISA_sub_group_dpas:
62
64
return grfAlignment ();
63
65
case GenISAIntrinsic::GenISA_simdBlockRead:
64
66
case GenISAIntrinsic::GenISA_LSC2DBlockRead:
65
- isSend = true ;
66
- break ;
67
+ return getSendPayloadAlignment (is64BitTy);
67
68
default :
68
69
break ;
69
70
}
70
71
}
71
72
72
73
// Check if V is used in send
74
+ bool isSend = false ;
73
75
for (auto UI = V->user_begin (), UE = V->user_end (); UI != UE; ++UI) {
74
76
User* U = *UI;
75
77
if (isa<LoadInst>(U) || isa<StoreInst>(U))
@@ -486,7 +488,7 @@ void VariableReuseAnalysis::postProcessing()
486
488
// m_LifetimeAtEndOfBB is used to keep track of it; for the latter,
487
489
// m_LifetimeAt1stDefOfBB is used.
488
490
ValueVectorTy AllVals;
489
- SmallVector<Value*, 8 > valInCC;
491
+ SmallVector<Value*, 16 > valInCC;
490
492
m_DeSSA->getAllValuesInCongruentClass (aliasee, valInCC);
491
493
AllVals.insert (AllVals.end (), valInCC.begin (), valInCC.end ());
492
494
@@ -529,7 +531,7 @@ void VariableReuseAnalysis::postProcessing()
529
531
dessaRootVisited[rootV] = 1 ;
530
532
531
533
ValueVectorTy AllVals;
532
- SmallVector<Value*, 8 > valInCC;
534
+ SmallVector<Value*, 16 > valInCC;
533
535
m_DeSSA->getAllValuesInCongruentClass (rootV, valInCC);
534
536
AllVals.insert (AllVals.end (), valInCC.begin (), valInCC.end ());
535
537
@@ -666,7 +668,7 @@ void VariableReuseAnalysis::visitExtractElementInst(ExtractElementInst& I)
666
668
}
667
669
668
670
// Valid vec alias and add it into alias map
669
- addVecAlias (EEI_nv, vec_nv, iIdx);
671
+ addVecAlias (EEI_nv, vec_nv, vecVal, iIdx);
670
672
671
673
// Mark this inst as noop inst
672
674
m_HasBecomeNoopInsts[EEI] = 1 ;
@@ -759,7 +761,8 @@ void VariableReuseAnalysis::dumpAlias() const
759
761
760
762
// Add alias Aliaser -> Aliasee[Idx]
761
763
void VariableReuseAnalysis::addVecAlias (
762
- Value* Aliaser, Value* Aliasee, int Idx, e_alignment AliaseeAlign)
764
+ Value* Aliaser, Value* Aliasee, Value* OrigBaseVec,
765
+ int Idx, e_alignment AliaseeAlign)
763
766
{
764
767
auto getLargerAlign = [](e_alignment A0, e_alignment A1) -> e_alignment {
765
768
if (A0 == EALIGN_AUTO)
@@ -779,7 +782,7 @@ void VariableReuseAnalysis::addVecAlias(
779
782
StartIx += SV->StartElementOffset ;
780
783
}
781
784
else {
782
- aliaseeBV = getOrCreateBaseVecDesc (Aliasee, AliaseeAlign);
785
+ aliaseeBV = getOrCreateBaseVecDesc (Aliasee, OrigBaseVec, AliaseeAlign);
783
786
}
784
787
// update align
785
788
aliaseeBV->Align = getLargerAlign (aliaseeBV->Align , AliaseeAlign);
@@ -840,10 +843,10 @@ SSubVecDesc* VariableReuseAnalysis::getOrCreateSubVecDesc(Value* V)
840
843
}
841
844
842
845
SBaseVecDesc* VariableReuseAnalysis::getOrCreateBaseVecDesc (Value* V,
843
- e_alignment A)
846
+ Value* OV, e_alignment A)
844
847
{
845
848
if (m_baseVecMap.count (V) == 0 ) {
846
- SBaseVecDesc* BV = new (Allocator) SBaseVecDesc (V, A);
849
+ SBaseVecDesc* BV = new (Allocator) SBaseVecDesc (V, OV, A);
847
850
m_baseVecMap.insert (std::make_pair (V, BV));
848
851
}
849
852
return m_baseVecMap[V];
@@ -1006,6 +1009,14 @@ Value* VariableReuseAnalysis::traceAliasValue(Value* V)
1006
1009
{
1007
1010
if (CastInst * CastI = dyn_cast_or_null<CastInst>(V))
1008
1011
{
1012
+ // Only handle Noop cast inst. For example,
1013
+ // dst = bitcast <3 x i32> src to <3 x float>,
1014
+ // it is okay, but the following isn't.
1015
+ // dst = bitcast <3 x i64> src to <6 x i32>
1016
+ if (!isNoOpInst (CastI, m_pCtx)) {
1017
+ return V;
1018
+ }
1019
+
1009
1020
Value* Src = CastI->getOperand (0 );
1010
1021
if (isa<Constant>(Src))
1011
1022
return CastI;
@@ -1198,7 +1209,7 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy& AllIEIs)
1198
1209
}
1199
1210
1200
1211
e_alignment BaseAlign;
1201
- if (!checkSubAlign (BaseAlign, Sub_nv, Base_nv , BaseStartIx)) {
1212
+ if (!checkSubAlign (BaseAlign, Sub, BaseVec , BaseStartIx)) {
1202
1213
return false ;
1203
1214
}
1204
1215
@@ -1230,7 +1241,7 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy& AllIEIs)
1230
1241
}
1231
1242
1232
1243
// add alias
1233
- addVecAlias (Sub_nv, Base_nv, BaseStartIx, BaseAlign);
1244
+ addVecAlias (Sub_nv, Base_nv, BaseVec, BaseStartIx, BaseAlign);
1234
1245
1235
1246
// Make sure noop insts are in the map.
1236
1247
for (int i = 0 , sz = nelts; i < sz; ++i)
@@ -1246,7 +1257,7 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy& AllIEIs)
1246
1257
if (!m_DeSSA->isNoopAliaser (EEI)) {
1247
1258
// Set EEI as an aliser, thus it become noop.
1248
1259
Value *EEI_nv = m_DeSSA->getNodeValue (EEI);
1249
- addVecAlias (EEI_nv, Base_nv, AllIEIs[i].FromVec_eltIx , EALIGN_AUTO);
1260
+ addVecAlias (EEI_nv, Base_nv, BaseVec, AllIEIs[i].FromVec_eltIx , EALIGN_AUTO);
1250
1261
m_HasBecomeNoopInsts[EEI] = 1 ;
1251
1262
}
1252
1263
}
@@ -1378,7 +1389,7 @@ bool VariableReuseAnalysis::processInsertTo(VecInsEltInfoTy& AllIEIs)
1378
1389
continue ;
1379
1390
}
1380
1391
}
1381
- addVecAlias (V_nv, Base_nv, V_ix, BaseAlign);
1392
+ addVecAlias (V_nv, Base_nv, FirstIEI, V_ix, BaseAlign);
1382
1393
1383
1394
int V_sz = getNumElts (V);
1384
1395
if (V_sz > 1 )
@@ -1399,7 +1410,7 @@ bool VariableReuseAnalysis::processInsertTo(VecInsEltInfoTy& AllIEIs)
1399
1410
if (!m_DeSSA->isNoopAliaser (EEI)) {
1400
1411
// EEI should be in alias map so it can be marked as noop
1401
1412
Value *EEI_nv = m_DeSSA->getNodeValue (EEI);
1402
- addVecAlias (EEI_nv, Base_nv, j);
1413
+ addVecAlias (EEI_nv, Base_nv, FirstIEI, j);
1403
1414
m_HasBecomeNoopInsts[EEI] = 1 ;
1404
1415
}
1405
1416
}
@@ -1482,8 +1493,13 @@ bool VariableReuseAnalysis::aliasInterfere(Value* Sub, Value* Base, int BaseIdx)
1482
1493
bool VariableReuseAnalysis::isCandidateUse (Value* V) const
1483
1494
{
1484
1495
for (User* U : V->users ()) {
1485
- if (GenIntrinsicInst* CI = dyn_cast<GenIntrinsicInst>(U)) {
1486
- switch (CI->getIntrinsicID ()) {
1496
+ Value* V = U;
1497
+ CastInst* CI = dyn_cast<CastInst>(V);
1498
+ if (CI && isNoOpInst (CI, m_pCtx)) {
1499
+ V = CI->getOperand (0 );
1500
+ }
1501
+ if (GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(V)) {
1502
+ switch (GII->getIntrinsicID ()) {
1487
1503
case GenISAIntrinsic::GenISA_sub_group_dpas:
1488
1504
case GenISAIntrinsic::GenISA_LSC2DBlockWrite:
1489
1505
case GenISAIntrinsic::GenISA_simdBlockWrite:
@@ -1492,7 +1508,7 @@ bool VariableReuseAnalysis::isCandidateUse(Value* V) const
1492
1508
break ;
1493
1509
}
1494
1510
}
1495
- else if (StoreInst* SI = dyn_cast<StoreInst>(U )) {
1511
+ else if (StoreInst* SI = dyn_cast<StoreInst>(V )) {
1496
1512
return true ;
1497
1513
}
1498
1514
}
@@ -1502,8 +1518,13 @@ bool VariableReuseAnalysis::isCandidateUse(Value* V) const
1502
1518
// Check if a value is defined by instructions that we handle.
1503
1519
bool VariableReuseAnalysis::isCandidateDef (Value* V) const
1504
1520
{
1505
- if (GenIntrinsicInst* CI = dyn_cast<GenIntrinsicInst>(V)) {
1506
- switch (CI->getIntrinsicID ()) {
1521
+ Value* Val = V;
1522
+ CastInst* CI = dyn_cast<CastInst>(Val);
1523
+ if (CI && isNoOpInst (CI, m_pCtx)) {
1524
+ Val = CI->getOperand (0 );
1525
+ }
1526
+ if (GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(Val)) {
1527
+ switch (GII->getIntrinsicID ()) {
1507
1528
case GenISAIntrinsic::GenISA_sub_group_dpas:
1508
1529
case GenISAIntrinsic::GenISA_LSC2DBlockWrite:
1509
1530
case GenISAIntrinsic::GenISA_simdBlockWrite:
@@ -1512,19 +1533,24 @@ bool VariableReuseAnalysis::isCandidateDef(Value* V) const
1512
1533
break ;
1513
1534
}
1514
1535
}
1515
- else if (StoreInst* SI = dyn_cast<StoreInst>(V )) {
1536
+ else if (StoreInst* SI = dyn_cast<StoreInst>(Val )) {
1516
1537
return true ;
1517
1538
}
1518
1539
return false ;
1519
1540
}
1520
1541
1521
- // Given node values Subvec_nd and Basevec_nd. This function checks
1522
- // if Subvec can be a sub-vector of Basevec_nd at base idex Base_ix.
1523
- // If so, return true, and set the correct alignment requirement
1524
- // to BaseAlign.
1525
- bool VariableReuseAnalysis::checkSubAlign (e_alignment& BaseAlign,
1526
- Value* Subvec_nd, Value* Basevec_nd, int Base_ix)
1542
+ // Check if SubVec is aligned if it becomes a sub-vector at Base_ix of
1543
+ // BaseVec. If so, return true with SubVec alignment in BaseAlign.
1544
+ bool VariableReuseAnalysis::checkSubAlign (e_alignment& BaseAlign,
1545
+ Value* SubVec, Value* BaseVec, int Base_ix)
1527
1546
{
1547
+ auto maxAlign = [](e_alignment A, e_alignment B) {
1548
+ if (A == EALIGN_AUTO)
1549
+ return B;
1550
+ if (B == EALIGN_AUTO)
1551
+ return A;
1552
+ return A > B ? A : B;
1553
+ };
1528
1554
1529
1555
auto toBytes = [](e_alignment A) {
1530
1556
switch (A) {
@@ -1542,11 +1568,21 @@ bool VariableReuseAnalysis::checkSubAlign (e_alignment& BaseAlign,
1542
1568
};
1543
1569
1544
1570
BaseAlign = EALIGN_AUTO;
1545
- Type* eltTy = Basevec_nd->getType ()->getScalarType ();
1546
- uint32_t eltBytes = ((uint32_t )eltTy->getPrimitiveSizeInBits () / 8 );
1547
- Type* sEltTy = Subvec_nd->getType ()->getScalarType ();
1548
- IGC_ASSERT (eltBytes == ((uint32_t )sEltTy ->getPrimitiveSizeInBits () / 8 ));
1549
- e_alignment sub_align = getMinAlignment (Subvec_nd, m_WIA, m_pCtx);
1571
+
1572
+ // Get element bytes from original base vector
1573
+ Type* eltTy = BaseVec->getType ()->getScalarType ();
1574
+ uint32_t eltBytes = (uint32_t )m_DL->getTypeStoreSize (eltTy);
1575
+
1576
+ // get all coalesced values for subvec and find the max alignment
1577
+ SmallVector<Value*, 16 > allVals;
1578
+ m_DeSSA->getAllCoalescedValues (SubVec, allVals);
1579
+
1580
+ e_alignment sub_align = EALIGN_AUTO;
1581
+ for (auto II : allVals) {
1582
+ Value* V = II;
1583
+ e_alignment thisAlign = getMinAlignment (V, m_WIA, m_pCtx);
1584
+ sub_align = maxAlign (sub_align, thisAlign);
1585
+ }
1550
1586
int sub_alignBytes = toBytes (sub_align);
1551
1587
if (sub_alignBytes == 0 ) {
1552
1588
// AUTO align is fine.
@@ -1555,10 +1591,12 @@ bool VariableReuseAnalysis::checkSubAlign (e_alignment& BaseAlign,
1555
1591
1556
1592
// m_SimdSize is unavailable, using smallest simdsize for now.
1557
1593
int simdsize = numLanes (m_pCtx->platform .getMinDispatchMode ());
1558
- int uLanes = (m_WIA->isUniform (Basevec_nd ) ? 1 : simdsize);
1594
+ int uLanes = (m_WIA->isUniform (BaseVec ) ? 1 : simdsize);
1559
1595
// If base is an aliaser at this time, must check its aliasee
1596
+
1597
+ Value* BaseVec_nd = m_DeSSA->getNodeValue (BaseVec);
1560
1598
int ix1 = 0 ;
1561
- auto MII = m_aliasMap.find (Basevec_nd );
1599
+ auto MII = m_aliasMap.find (BaseVec_nd );
1562
1600
if (MII != m_aliasMap.end ()) {
1563
1601
SSubVecDesc* SV = MII->second ;
1564
1602
ix1 = SV->StartElementOffset ;
0 commit comments