@@ -152,9 +152,8 @@ static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) {
152
152
bool VectorCombine::vectorizeLoadInsert (Instruction &I) {
153
153
// Match insert into fixed vector of scalar value.
154
154
// TODO: Handle non-zero insert index.
155
- auto *Ty = dyn_cast<FixedVectorType>(I.getType ());
156
155
Value *Scalar;
157
- if (!Ty || ! match (&I, m_InsertElt (m_Undef (), m_Value (Scalar), m_ZeroInt ())) ||
156
+ if (!match (&I, m_InsertElt (m_Undef (), m_Value (Scalar), m_ZeroInt ())) ||
158
157
!Scalar->hasOneUse ())
159
158
return false ;
160
159
@@ -241,6 +240,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
241
240
// We assume this operation has no cost in codegen if there was no offset.
242
241
// Note that we could use freeze to avoid poison problems, but then we might
243
242
// still need a shuffle to change the vector size.
243
+ auto *Ty = cast<FixedVectorType>(I.getType ());
244
244
unsigned OutputNumElts = Ty->getNumElements ();
245
245
SmallVector<int , 16 > Mask (OutputNumElts, UndefMaskElem);
246
246
assert (OffsetEltIndex < MinVecNumElts && " Address offset too big" );
@@ -271,9 +271,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
271
271
// / This removes a shuffle in IR and may allow combining of other loaded values.
272
272
bool VectorCombine::widenSubvectorLoad (Instruction &I) {
273
273
// Match subvector insert of fixed vector.
274
- auto *Ty = dyn_cast<FixedVectorType>(I.getType ());
275
274
auto *Shuf = dyn_cast<ShuffleVectorInst>(&I);
276
- if (!Ty || ! Shuf || !Shuf->isIdentityWithPadding ())
275
+ if (!Shuf || !Shuf->isIdentityWithPadding ())
277
276
return false ;
278
277
279
278
// Allow a non-canonical shuffle mask that is choosing elements from op1.
@@ -290,6 +289,7 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I) {
290
289
// We use minimal alignment (maximum flexibility) because we only care about
291
290
// the dereferenceable region. When calculating cost and creating a new op,
292
291
// we may use a larger value based on alignment attributes.
292
+ auto *Ty = cast<FixedVectorType>(I.getType ());
293
293
const DataLayout &DL = I.getModule ()->getDataLayout ();
294
294
Value *SrcPtr = Load->getPointerOperand ()->stripPointerCasts ();
295
295
assert (isa<PointerType>(SrcPtr->getType ()) && " Expected a pointer type" );
@@ -608,10 +608,6 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
608
608
// / Try to replace an extract + scalar fneg + insert with a vector fneg +
609
609
// / shuffle.
610
610
bool VectorCombine::foldInsExtFNeg (Instruction &I) {
611
- auto *VecTy = dyn_cast<FixedVectorType>(I.getType ());
612
- if (!VecTy)
613
- return false ;
614
-
615
611
// Match an insert (op (extract)) pattern.
616
612
Value *DestVec;
617
613
uint64_t Index;
@@ -629,6 +625,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
629
625
return false ;
630
626
631
627
// TODO: We could handle this with a length-changing shuffle.
628
+ auto *VecTy = cast<FixedVectorType>(I.getType ());
632
629
if (SrcVec->getType () != VecTy)
633
630
return false ;
634
631
@@ -685,11 +682,11 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
685
682
// mask for scalable type is a splat or not.
686
683
// 2) Disallow non-vector casts and length-changing shuffles.
687
684
// TODO: We could allow any shuffle.
688
- auto *DestTy = dyn_cast<FixedVectorType>(I.getType ());
689
685
auto *SrcTy = dyn_cast<FixedVectorType>(V->getType ());
690
- if (!SrcTy || !DestTy || I.getOperand (0 )->getType () != SrcTy)
686
+ if (!SrcTy || I.getOperand (0 )->getType () != SrcTy)
691
687
return false ;
692
688
689
+ auto *DestTy = cast<FixedVectorType>(I.getType ());
693
690
unsigned DestNumElts = DestTy->getNumElements ();
694
691
unsigned SrcNumElts = SrcTy->getNumElements ();
695
692
SmallVector<int , 16 > NewMask;
@@ -1121,17 +1118,14 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
1121
1118
if (!match (&I, m_Load (m_Value (Ptr))))
1122
1119
return false ;
1123
1120
1121
+ auto *FixedVT = cast<FixedVectorType>(I.getType ());
1124
1122
auto *LI = cast<LoadInst>(&I);
1125
1123
const DataLayout &DL = I.getModule ()->getDataLayout ();
1126
- if (LI->isVolatile () || !DL.typeSizeEqualsStoreSize (LI->getType ()))
1127
- return false ;
1128
-
1129
- auto *FixedVT = dyn_cast<FixedVectorType>(LI->getType ());
1130
- if (!FixedVT)
1124
+ if (LI->isVolatile () || !DL.typeSizeEqualsStoreSize (FixedVT))
1131
1125
return false ;
1132
1126
1133
1127
InstructionCost OriginalCost =
1134
- TTI.getMemoryOpCost (Instruction::Load, LI-> getType () , LI->getAlign (),
1128
+ TTI.getMemoryOpCost (Instruction::Load, FixedVT , LI->getAlign (),
1135
1129
LI->getPointerAddressSpace ());
1136
1130
InstructionCost ScalarizedCost = 0 ;
1137
1131
@@ -1171,7 +1165,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
1171
1165
1172
1166
auto *Index = dyn_cast<ConstantInt>(UI->getOperand (1 ));
1173
1167
OriginalCost +=
1174
- TTI.getVectorInstrCost (Instruction::ExtractElement, LI-> getType () ,
1168
+ TTI.getVectorInstrCost (Instruction::ExtractElement, FixedVT ,
1175
1169
Index ? Index->getZExtValue () : -1 );
1176
1170
ScalarizedCost +=
1177
1171
TTI.getMemoryOpCost (Instruction::Load, FixedVT->getElementType (),
@@ -1206,10 +1200,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
1206
1200
// / Try to convert "shuffle (binop), (binop)" with a shared binop operand into
1207
1201
// / "binop (shuffle), (shuffle)".
1208
1202
bool VectorCombine::foldShuffleOfBinops (Instruction &I) {
1209
- auto *VecTy = dyn_cast<FixedVectorType>(I.getType ());
1210
- if (!VecTy)
1211
- return false ;
1212
-
1203
+ auto *VecTy = cast<FixedVectorType>(I.getType ());
1213
1204
BinaryOperator *B0, *B1;
1214
1205
ArrayRef<int > Mask;
1215
1206
if (!match (&I, m_Shuffle (m_OneUse (m_BinOp (B0)), m_OneUse (m_BinOp (B1)),
@@ -1381,14 +1372,16 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
1381
1372
// / number of operations if the target reports them as cheaper.
1382
1373
bool VectorCombine::foldSelectShuffle (Instruction &I, bool FromReduction) {
1383
1374
auto *SVI = dyn_cast<ShuffleVectorInst>(&I);
1384
- auto *VT = dyn_cast<FixedVectorType>(I.getType ());
1385
- if (!SVI || !VT)
1375
+ if (!SVI)
1386
1376
return false ;
1377
+
1378
+ auto *VT = cast<FixedVectorType>(I.getType ());
1387
1379
auto *Op0 = dyn_cast<Instruction>(SVI->getOperand (0 ));
1388
1380
auto *Op1 = dyn_cast<Instruction>(SVI->getOperand (1 ));
1389
1381
if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp () || !Op1->isBinaryOp () ||
1390
1382
VT != Op0->getType ())
1391
1383
return false ;
1384
+
1392
1385
auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand (0 ));
1393
1386
auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand (1 ));
1394
1387
auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand (0 ));
@@ -1706,18 +1699,23 @@ bool VectorCombine::run() {
1706
1699
auto FoldInst = [this , &MadeChange](Instruction &I) {
1707
1700
Builder.SetInsertPoint (&I);
1708
1701
if (!ScalarizationOnly) {
1709
- MadeChange |= vectorizeLoadInsert (I);
1710
- MadeChange |= widenSubvectorLoad (I);
1711
- MadeChange |= foldExtractExtract (I);
1712
- MadeChange |= foldInsExtFNeg (I);
1713
- MadeChange |= foldBitcastShuf (I);
1714
- MadeChange |= foldExtractedCmps (I);
1715
- MadeChange |= foldShuffleOfBinops (I);
1716
- MadeChange |= foldShuffleFromReductions (I);
1717
- MadeChange |= foldSelectShuffle (I);
1702
+ if (isa<FixedVectorType>(I.getType ())) {
1703
+ MadeChange |= vectorizeLoadInsert (I);
1704
+ MadeChange |= widenSubvectorLoad (I);
1705
+ MadeChange |= foldInsExtFNeg (I);
1706
+ MadeChange |= foldBitcastShuf (I);
1707
+ MadeChange |= foldShuffleOfBinops (I);
1708
+ MadeChange |= foldSelectShuffle (I);
1709
+ } else {
1710
+ MadeChange |= foldExtractExtract (I);
1711
+ MadeChange |= foldExtractedCmps (I);
1712
+ MadeChange |= foldShuffleFromReductions (I);
1713
+ }
1714
+ }
1715
+ if (isa<FixedVectorType>(I.getType ())) {
1716
+ MadeChange |= scalarizeBinopOrCmp (I);
1717
+ MadeChange |= scalarizeLoadExtract (I);
1718
1718
}
1719
- MadeChange |= scalarizeBinopOrCmp (I);
1720
- MadeChange |= scalarizeLoadExtract (I);
1721
1719
MadeChange |= foldSingleElementStore (I);
1722
1720
};
1723
1721
for (BasicBlock &BB : F) {
0 commit comments