Skip to content

Commit 396d18a

Browse files
committed
[InstCombine] replace shuffle's insertelement operand if inserted scalar is not demanded
This pattern is noted as a regression from: D70246 ...where we removed an over-aggressive shuffle simplification. SimplifyDemandedVectorElts fails to catch this case when the insert has multiple uses, so I'm proposing to pattern match the minimal sequence directly. This fold does not conflict with any of our current shuffle undef/poison semantics. Differential Revision: https://reviews.llvm.org/D71220
1 parent a0c558e commit 396d18a

File tree

2 files changed

+33
-3
lines changed

2 files changed

+33
-3
lines changed

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1741,7 +1741,8 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
17411741
return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
17421742
}
17431743

1744-
/// Try to replace a shuffle with an insertelement.
1744+
/// Try to replace a shuffle with an insertelement or try to replace a shuffle
1745+
/// operand with the operand of an insertelement.
17451746
static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
17461747
Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
17471748
SmallVector<int, 16> Mask = Shuf.getShuffleMask();
@@ -1753,6 +1754,31 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
17531754
if (NumElts != (int)(V0->getType()->getVectorNumElements()))
17541755
return nullptr;
17551756

1757+
// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
1758+
// not be able to handle it there if the insertelement has >1 use.
1759+
// If the shuffle has an insertelement operand but does not choose the
1760+
// inserted scalar element from that value, then we can replace that shuffle
1761+
// operand with the source vector of the insertelement.
1762+
Value *X;
1763+
uint64_t IdxC;
1764+
if (match(V0, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
1765+
// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
1766+
if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; })) {
1767+
Shuf.setOperand(0, X);
1768+
return &Shuf;
1769+
}
1770+
}
1771+
if (match(V1, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
1772+
// Offset the index constant by the vector width because we are checking for
1773+
// accesses to the 2nd vector input of the shuffle.
1774+
IdxC += NumElts;
1775+
// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
1776+
if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; })) {
1777+
Shuf.setOperand(1, X);
1778+
return &Shuf;
1779+
}
1780+
}
1781+
17561782
// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
17571783
auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
17581784
// We need an insertelement with a constant index.

llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ define <4 x float> @insert_undemanded_element_op0(<4 x float> %x, <4 x float> %y
670670
; CHECK-LABEL: @insert_undemanded_element_op0(
671671
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3
672672
; CHECK-NEXT: call void @use(<4 x float> [[INS]])
673-
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[INS]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 7, i32 1, i32 4>
673+
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 7, i32 1, i32 4>
674674
; CHECK-NEXT: ret <4 x float> [[S]]
675675
;
676676
%ins = insertelement <4 x float> %x, float 42.0, i32 3
@@ -683,7 +683,7 @@ define <4 x float> @insert_undemanded_element_op1(<4 x float> %x, <4 x float> %y
683683
; CHECK-LABEL: @insert_undemanded_element_op1(
684684
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3
685685
; CHECK-NEXT: call void @use(<4 x float> [[INS]])
686-
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[INS]], <4 x i32> <i32 3, i32 2, i32 1, i32 4>
686+
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X]], <4 x i32> <i32 3, i32 2, i32 1, i32 4>
687687
; CHECK-NEXT: ret <4 x float> [[S]]
688688
;
689689
%ins = insertelement <4 x float> %x, float 42.0, i32 3
@@ -692,6 +692,8 @@ define <4 x float> @insert_undemanded_element_op1(<4 x float> %x, <4 x float> %y
692692
ret <4 x float> %s
693693
}
694694

695+
; Negative test - shuffle chooses the inserted constant.
696+
695697
define <4 x float> @insert_demanded_element_op0(<4 x float> %x, <4 x float> %y) {
696698
; CHECK-LABEL: @insert_demanded_element_op0(
697699
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3
@@ -705,6 +707,8 @@ define <4 x float> @insert_demanded_element_op0(<4 x float> %x, <4 x float> %y)
705707
ret <4 x float> %s
706708
}
707709

710+
; Negative test - shuffle chooses the inserted constant.
711+
708712
define <4 x float> @insert_demanded_element_op1(<4 x float> %x, <4 x float> %y) {
709713
; CHECK-LABEL: @insert_demanded_element_op1(
710714
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.300000e+01, i32 3

0 commit comments

Comments
 (0)