Skip to content

Commit e239198

Browse files
committed
[InstCombine] fold select shuffles with shared operand together
We don't combine generic shuffles together in IR, but select shuffles are a special-case because a select shuffle of a select shuffle is just another select shuffle; codegen is expected to efficiently lower those (select shuffles are also the canonical form of a vector select with constant condition).
1 parent d743aff commit e239198

File tree

2 files changed

+65
-11
lines changed

2 files changed

+65
-11
lines changed

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1961,6 +1961,53 @@ static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
19611961
return {};
19621962
}
19631963

1964+
/// A select shuffle of a select shuffle with a shared operand can be reduced
1965+
/// to a single select shuffle. This is an obvious improvement in IR, and the
1966+
/// backend is expected to lower select shuffles efficiently.
1967+
static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
1968+
assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1969+
1970+
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1971+
SmallVector<int, 16> Mask;
1972+
Shuf.getShuffleMask(Mask);
1973+
unsigned NumElts = Mask.size();
1974+
1975+
// Canonicalize a select shuffle with common operand as Op1.
1976+
auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
1977+
if (ShufOp && ShufOp->isSelect() &&
1978+
(ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
1979+
std::swap(Op0, Op1);
1980+
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
1981+
}
1982+
1983+
ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
1984+
if (!ShufOp || !ShufOp->isSelect() ||
1985+
(ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
1986+
return nullptr;
1987+
1988+
Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
1989+
SmallVector<int, 16> Mask1;
1990+
ShufOp->getShuffleMask(Mask1);
1991+
assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
1992+
1993+
// Canonicalize common operand (Op0) as X (first operand of first shuffle).
1994+
if (Y == Op0) {
1995+
std::swap(X, Y);
1996+
ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts);
1997+
}
1998+
1999+
// If the mask chooses from X (operand 0), it stays the same.
2000+
// If the mask chooses from the earlier shuffle, the other mask value is
2001+
// transferred to the combined select shuffle:
2002+
// shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2003+
SmallVector<int, 16> NewMask(NumElts);
2004+
for (unsigned i = 0; i != NumElts; ++i)
2005+
NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2006+
2007+
assert(ShuffleVectorInst::isSelectMask(NewMask) && "Unexpected shuffle mask");
2008+
return new ShuffleVectorInst(X, Y, NewMask);
2009+
}
2010+
19642011
static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
19652012
assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
19662013

@@ -2065,6 +2112,9 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
20652112
return &Shuf;
20662113
}
20672114

2115+
if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
2116+
return I;
2117+
20682118
if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
20692119
return I;
20702120

llvm/test/Transforms/InstCombine/shuffle_select.ll

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,10 +1528,12 @@ define <4 x i32> @PR41419(<4 x i32> %v) {
15281528
ret <4 x i32> %s
15291529
}
15301530

1531+
; The shuffle masks in the next 4 tests are identical to make it easier
1532+
; to see that we are choosing the correct elements in the new shuffle.
1533+
15311534
define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) {
15321535
; CHECK-LABEL: @sel_common_op_commute0(
1533-
; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 undef, i32 6, i32 2, i32 undef, i32 undef>
1534-
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X]], <5 x i4> [[S1]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1536+
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 4>
15351537
; CHECK-NEXT: ret <5 x i4> [[S2]]
15361538
;
15371539
%s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1541,8 +1543,7 @@ define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) {
15411543

15421544
define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) {
15431545
; CHECK-LABEL: @sel_common_op_commute1(
1544-
; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 undef, i32 6, i32 2, i32 undef, i32 undef>
1545-
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X]], <5 x i4> [[S1]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1546+
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 7, i32 3, i32 4>
15461547
; CHECK-NEXT: ret <5 x i4> [[S2]]
15471548
;
15481549
%s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1552,8 +1553,7 @@ define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) {
15521553

15531554
define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) {
15541555
; CHECK-LABEL: @sel_common_op_commute2(
1555-
; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 9>
1556-
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1556+
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9>
15571557
; CHECK-NEXT: ret <5 x i4> [[S2]]
15581558
;
15591559
%s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1563,8 +1563,7 @@ define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) {
15631563

15641564
define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) {
15651565
; CHECK-LABEL: @sel_common_op_commute3(
1566-
; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 9>
1567-
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1566+
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 9>
15681567
; CHECK-NEXT: ret <5 x i4> [[S2]]
15691568
;
15701569
%s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1574,15 +1573,16 @@ define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) {
15741573

15751574
define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> %y) {
15761575
; CHECK-LABEL: @sel_common_op_commute3_poison_mask_elts(
1577-
; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 9>
1578-
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 0, i32 6, i32 undef, i32 undef, i32 4>
1576+
; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 undef, i32 undef, i32 9>
15791577
; CHECK-NEXT: ret <5 x i4> [[S2]]
15801578
;
15811579
%s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 poison, i32 9>
15821580
%s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 poison, i32 3, i32 4>
15831581
ret <5 x i4> %s2
15841582
}
15851583

1584+
; negative test - need shared operand
1585+
15861586
define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %z) {
15871587
; CHECK-LABEL: @sel_not_common_op_commute3(
15881588
; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[Z:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 9>
@@ -1594,6 +1594,8 @@ define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %
15941594
ret <5 x i4> %s2
15951595
}
15961596

1597+
; negative test - need "select" shuffle, no lane changes
1598+
15971599
define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) {
15981600
; CHECK-LABEL: @not_sel_common_op(
15991601
; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 undef, i32 6, i32 undef, i32 3, i32 9>
@@ -1605,11 +1607,13 @@ define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) {
16051607
ret <5 x i4> %s2
16061608
}
16071609

1610+
; extra use is ok
1611+
16081612
define <4 x i32> @sel_common_op_extra_use(<4 x i32> %x, <4 x i32> %y) {
16091613
; CHECK-LABEL: @sel_common_op_extra_use(
16101614
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
16111615
; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[S1]])
1612-
; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1616+
; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
16131617
; CHECK-NEXT: ret <4 x i32> [[S2]]
16141618
;
16151619
%s1 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 2, i32 7>

0 commit comments

Comments
 (0)