Skip to content

Commit 4446f71

Browse files
committed
[InstCombine] try to fold a pair of insertelements into one insertelement
This replaces patches that tried to convert related patterns to shuffles (D138872, D138873, D138874 - reverted/abandoned) but caused codegen problems and were questionable as a canonicalization because an insertelement is a simpler op than a shuffle. This detects a larger pattern -- insert-of-insert -- and replaces with another insert, so this hopefully does not cause any problems. As noted by TODO items in the code and tests, this could go a lot further. But this is enough to reduce the motivating test from issue llvm#17113. Example proofs: https://alive2.llvm.org/ce/z/NnUv3a I drafted a version of this for AggressiveInstCombine, but it seems that would uncover yet another phase ordering gap. If we do generalize this to handle the full range of potential patterns, that may be worth looking at again. Differential Revision: https://reviews.llvm.org/D139668
1 parent 620d2bf commit 4446f71

File tree

3 files changed

+312
-82
lines changed

3 files changed

+312
-82
lines changed

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,6 +1515,57 @@ static Instruction *narrowInsElt(InsertElementInst &InsElt,
15151515
return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
15161516
}
15171517

1518+
/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1519+
/// try to convert to a single insert with appropriate bitcasts.
1520+
static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt,
1521+
bool IsBigEndian,
1522+
InstCombiner::BuilderTy &Builder) {
1523+
Value *VecOp = InsElt.getOperand(0);
1524+
Value *ScalarOp = InsElt.getOperand(1);
1525+
Value *IndexOp = InsElt.getOperand(2);
1526+
1527+
// inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1528+
// TODO: The insertion order could be reversed.
1529+
// TODO: Detect smaller fractions of the scalar.
1530+
// TODO: One-use checks are conservative.
1531+
auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1532+
Value *X, *BaseVec;
1533+
uint64_t ShAmt, Index0, Index1;
1534+
if (!VTy || (VTy->getNumElements() & 1) ||
1535+
!match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec), m_Trunc(m_Value(X)),
1536+
m_ConstantInt(Index0)))) ||
1537+
!match(ScalarOp, m_OneUse(m_Trunc(m_LShr(m_Specific(X),
1538+
m_ConstantInt(ShAmt))))) ||
1539+
!match(IndexOp, m_ConstantInt(Index1)))
1540+
return nullptr;
1541+
1542+
Type *SrcTy = X->getType();
1543+
unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1544+
unsigned VecEltWidth = VTy->getScalarSizeInBits();
1545+
if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1546+
return nullptr;
1547+
1548+
// The low half must be inserted at element +1 for big-endian.
1549+
// The high half must be inserted at element +1 for little-endian
1550+
if (IsBigEndian ? Index0 != Index1 + 1 : Index0 + 1 != Index1)
1551+
return nullptr;
1552+
1553+
// The high half must be inserted at an even element for big-endian.
1554+
// The low half must be inserted at an even element for little-endian.
1555+
if (IsBigEndian ? Index1 & 1 : Index0 & 1)
1556+
return nullptr;
1557+
1558+
// Bitcast the base vector to a vector type with the source element type.
1559+
Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1560+
Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1561+
1562+
// Scale the insert index for a vector with half as many elements.
1563+
// bitcast (inselt (bitcast BaseVec), X, NewIndex)
1564+
uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1565+
Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1566+
return new BitCastInst(NewInsert, VTy);
1567+
}
1568+
15181569
Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
15191570
Value *VecOp = IE.getOperand(0);
15201571
Value *ScalarOp = IE.getOperand(1);
@@ -1642,6 +1693,9 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
16421693
if (Instruction *Ext = narrowInsElt(IE, Builder))
16431694
return Ext;
16441695

1696+
if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder))
1697+
return Ext;
1698+
16451699
return nullptr;
16461700
}
16471701

0 commit comments

Comments
 (0)