Skip to content

Commit 87debda

Browse files
committed
[VectorCombine] check instruction type before dispatching to folds
This is no externally visible change intended, but appears to be a noticeable (surprising) improvement in compile-time based on: https://llvm-compile-time-tracker.com/compare.php?from=0f3e72e86c8c7c6bf0ec24bf1e2acd74b4123e7b&to=5e8c2026d10e8e2c93c038c776853bed0e7c8fc1&stat=instructions:u The early returns in the individual fold functions are not good enough to avoid the overhead of the many "fold*" calls, so this speeds up the main instruction loop enough to make a difference.
1 parent ffe05b8 commit 87debda

File tree

1 file changed

+32
-34
lines changed

1 file changed

+32
-34
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,8 @@ static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) {
152152
bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
153153
// Match insert into fixed vector of scalar value.
154154
// TODO: Handle non-zero insert index.
155-
auto *Ty = dyn_cast<FixedVectorType>(I.getType());
156155
Value *Scalar;
157-
if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
156+
if (!match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
158157
!Scalar->hasOneUse())
159158
return false;
160159

@@ -241,6 +240,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
241240
// We assume this operation has no cost in codegen if there was no offset.
242241
// Note that we could use freeze to avoid poison problems, but then we might
243242
// still need a shuffle to change the vector size.
243+
auto *Ty = cast<FixedVectorType>(I.getType());
244244
unsigned OutputNumElts = Ty->getNumElements();
245245
SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
246246
assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
@@ -271,9 +271,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
271271
/// This removes a shuffle in IR and may allow combining of other loaded values.
272272
bool VectorCombine::widenSubvectorLoad(Instruction &I) {
273273
// Match subvector insert of fixed vector.
274-
auto *Ty = dyn_cast<FixedVectorType>(I.getType());
275274
auto *Shuf = dyn_cast<ShuffleVectorInst>(&I);
276-
if (!Ty || !Shuf || !Shuf->isIdentityWithPadding())
275+
if (!Shuf || !Shuf->isIdentityWithPadding())
277276
return false;
278277

279278
// Allow a non-canonical shuffle mask that is choosing elements from op1.
@@ -290,6 +289,7 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I) {
290289
// We use minimal alignment (maximum flexibility) because we only care about
291290
// the dereferenceable region. When calculating cost and creating a new op,
292291
// we may use a larger value based on alignment attributes.
292+
auto *Ty = cast<FixedVectorType>(I.getType());
293293
const DataLayout &DL = I.getModule()->getDataLayout();
294294
Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
295295
assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
@@ -608,10 +608,6 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
608608
/// Try to replace an extract + scalar fneg + insert with a vector fneg +
609609
/// shuffle.
610610
bool VectorCombine::foldInsExtFNeg(Instruction &I) {
611-
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
612-
if (!VecTy)
613-
return false;
614-
615611
// Match an insert (op (extract)) pattern.
616612
Value *DestVec;
617613
uint64_t Index;
@@ -629,6 +625,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
629625
return false;
630626

631627
// TODO: We could handle this with a length-changing shuffle.
628+
auto *VecTy = cast<FixedVectorType>(I.getType());
632629
if (SrcVec->getType() != VecTy)
633630
return false;
634631

@@ -685,11 +682,11 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
685682
// mask for scalable type is a splat or not.
686683
// 2) Disallow non-vector casts and length-changing shuffles.
687684
// TODO: We could allow any shuffle.
688-
auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
689685
auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
690-
if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
686+
if (!SrcTy || I.getOperand(0)->getType() != SrcTy)
691687
return false;
692688

689+
auto *DestTy = cast<FixedVectorType>(I.getType());
693690
unsigned DestNumElts = DestTy->getNumElements();
694691
unsigned SrcNumElts = SrcTy->getNumElements();
695692
SmallVector<int, 16> NewMask;
@@ -1121,17 +1118,14 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
11211118
if (!match(&I, m_Load(m_Value(Ptr))))
11221119
return false;
11231120

1121+
auto *FixedVT = cast<FixedVectorType>(I.getType());
11241122
auto *LI = cast<LoadInst>(&I);
11251123
const DataLayout &DL = I.getModule()->getDataLayout();
1126-
if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType()))
1127-
return false;
1128-
1129-
auto *FixedVT = dyn_cast<FixedVectorType>(LI->getType());
1130-
if (!FixedVT)
1124+
if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(FixedVT))
11311125
return false;
11321126

11331127
InstructionCost OriginalCost =
1134-
TTI.getMemoryOpCost(Instruction::Load, LI->getType(), LI->getAlign(),
1128+
TTI.getMemoryOpCost(Instruction::Load, FixedVT, LI->getAlign(),
11351129
LI->getPointerAddressSpace());
11361130
InstructionCost ScalarizedCost = 0;
11371131

@@ -1171,7 +1165,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
11711165

11721166
auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
11731167
OriginalCost +=
1174-
TTI.getVectorInstrCost(Instruction::ExtractElement, LI->getType(),
1168+
TTI.getVectorInstrCost(Instruction::ExtractElement, FixedVT,
11751169
Index ? Index->getZExtValue() : -1);
11761170
ScalarizedCost +=
11771171
TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
@@ -1206,10 +1200,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
12061200
/// Try to convert "shuffle (binop), (binop)" with a shared binop operand into
12071201
/// "binop (shuffle), (shuffle)".
12081202
bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
1209-
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
1210-
if (!VecTy)
1211-
return false;
1212-
1203+
auto *VecTy = cast<FixedVectorType>(I.getType());
12131204
BinaryOperator *B0, *B1;
12141205
ArrayRef<int> Mask;
12151206
if (!match(&I, m_Shuffle(m_OneUse(m_BinOp(B0)), m_OneUse(m_BinOp(B1)),
@@ -1381,14 +1372,16 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
13811372
/// number of operations if the target reports them as cheaper.
13821373
bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
13831374
auto *SVI = dyn_cast<ShuffleVectorInst>(&I);
1384-
auto *VT = dyn_cast<FixedVectorType>(I.getType());
1385-
if (!SVI || !VT)
1375+
if (!SVI)
13861376
return false;
1377+
1378+
auto *VT = cast<FixedVectorType>(I.getType());
13871379
auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
13881380
auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
13891381
if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
13901382
VT != Op0->getType())
13911383
return false;
1384+
13921385
auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
13931386
auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
13941387
auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
@@ -1706,18 +1699,23 @@ bool VectorCombine::run() {
17061699
auto FoldInst = [this, &MadeChange](Instruction &I) {
17071700
Builder.SetInsertPoint(&I);
17081701
if (!ScalarizationOnly) {
1709-
MadeChange |= vectorizeLoadInsert(I);
1710-
MadeChange |= widenSubvectorLoad(I);
1711-
MadeChange |= foldExtractExtract(I);
1712-
MadeChange |= foldInsExtFNeg(I);
1713-
MadeChange |= foldBitcastShuf(I);
1714-
MadeChange |= foldExtractedCmps(I);
1715-
MadeChange |= foldShuffleOfBinops(I);
1716-
MadeChange |= foldShuffleFromReductions(I);
1717-
MadeChange |= foldSelectShuffle(I);
1702+
if (isa<FixedVectorType>(I.getType())) {
1703+
MadeChange |= vectorizeLoadInsert(I);
1704+
MadeChange |= widenSubvectorLoad(I);
1705+
MadeChange |= foldInsExtFNeg(I);
1706+
MadeChange |= foldBitcastShuf(I);
1707+
MadeChange |= foldShuffleOfBinops(I);
1708+
MadeChange |= foldSelectShuffle(I);
1709+
} else {
1710+
MadeChange |= foldExtractExtract(I);
1711+
MadeChange |= foldExtractedCmps(I);
1712+
MadeChange |= foldShuffleFromReductions(I);
1713+
}
1714+
}
1715+
if (isa<FixedVectorType>(I.getType())) {
1716+
MadeChange |= scalarizeBinopOrCmp(I);
1717+
MadeChange |= scalarizeLoadExtract(I);
17181718
}
1719-
MadeChange |= scalarizeBinopOrCmp(I);
1720-
MadeChange |= scalarizeLoadExtract(I);
17211719
MadeChange |= foldSingleElementStore(I);
17221720
};
17231721
for (BasicBlock &BB : F) {

0 commit comments

Comments
 (0)