Skip to content

Commit 3d827ff

Browse files
committed
[VectorCombine] Add intrinsics handling to shuffleToIdentity
This is probably the most involved addition, as it tries to make use of isTriviallyVectorizable with isVectorIntrinsicWithScalarOpAtArg to handle a number of different intrinsics that are all lane-wise. Additional tests have been added for some of the different intrinsics from isVectorIntrinsicWithScalarOpAtArg / isVectorIntrinsicWithOverloadTypeAtArg.
1 parent f757742 commit 3d827ff

File tree

2 files changed

+42
-53
lines changed

2 files changed

+42
-53
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,7 +1729,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
17291729
return false;
17301730

17311731
// Look for an identity value.
1732-
if (Item[0].second == 0 && Item[0].first->getType() == Ty &&
1732+
if (Item[0].second == 0 &&
1733+
cast<FixedVectorType>(Item[0].first->getType())->getNumElements() ==
1734+
Ty->getNumElements() &&
17331735
all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
17341736
return !E.value().first || (E.value().first == Item[0].first &&
17351737
E.value().second == (int)E.index());
@@ -1770,6 +1772,20 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
17701772
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
17711773
} else if (isa<UnaryOperator>(Item[0].first)) {
17721774
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1775+
} else if (auto *II = dyn_cast<IntrinsicInst>(Item[0].first);
1776+
II && isTriviallyVectorizable(II->getIntrinsicID())) {
1777+
for (unsigned O = 0; O < II->getNumOperands() - 1; O++) {
1778+
if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), O)) {
1779+
if (!all_of(drop_begin(Item), [&](InstLane &IL) {
1780+
return !IL.first ||
1781+
(cast<Instruction>(IL.first)->getOperand(O) ==
1782+
cast<Instruction>(Item[0].first)->getOperand(O));
1783+
}))
1784+
return false;
1785+
continue;
1786+
}
1787+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, O));
1788+
}
17731789
} else {
17741790
return false;
17751791
}
@@ -1796,13 +1812,24 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
17961812
}
17971813

17981814
auto *I = cast<Instruction>(Item[0].first);
1799-
SmallVector<Value *> Ops(I->getNumOperands());
1800-
for (unsigned Idx = 0, E = I->getNumOperands(); Idx < E; Idx++)
1815+
auto *II = dyn_cast<IntrinsicInst>(I);
1816+
unsigned NumOps = I->getNumOperands() - (II ? 1 : 0);
1817+
SmallVector<Value *> Ops(NumOps);
1818+
for (unsigned Idx = 0; Idx < NumOps; Idx++) {
1819+
if (II && isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Idx)) {
1820+
Ops[Idx] = II->getOperand(Idx);
1821+
continue;
1822+
}
18011823
Ops[Idx] = Generate(GenerateInstLaneVectorFromOperand(Item, Idx));
1824+
}
18021825
Builder.SetInsertPoint(I);
1826+
Type *DstTy = FixedVectorType::get(I->getType()->getScalarType(),
1827+
Ty->getNumElements());
18031828
if (auto BI = dyn_cast<BinaryOperator>(I))
18041829
return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
18051830
Ops[0], Ops[1]);
1831+
if (II)
1832+
return Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
18061833
assert(isa<UnaryInstruction>(I) &&
18071834
"Unexpected instruction type in Generate");
18081835
return Builder.CreateUnOp((Instruction::UnaryOps)I->getOpcode(), Ops[0]);

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 12 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,7 @@ define <8 x half> @fneg(<8 x half> %a, <8 x half> %b) {
102102

103103
define <8 x i8> @abs(<8 x i8> %a) {
104104
; CHECK-LABEL: @abs(
105-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
106-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
107-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AT]], i1 false)
108-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AB]], i1 false)
109-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
105+
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[A:%.*]], i1 false)
110106
; CHECK-NEXT: ret <8 x i8> [[R]]
111107
;
112108
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -119,11 +115,7 @@ define <8 x i8> @abs(<8 x i8> %a) {
119115

120116
define <8 x half> @powi(<8 x half> %a) {
121117
; CHECK-LABEL: @powi(
122-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
123-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
124-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x half> @llvm.powi.v4f16.i32(<4 x half> [[AT]], i32 10)
125-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x half> @llvm.powi.v4f16.i32(<4 x half> [[AB]], i32 10)
126-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
118+
; CHECK-NEXT: [[R:%.*]] = call <8 x half> @llvm.powi.v8f16.i32(<8 x half> [[A:%.*]], i32 10)
127119
; CHECK-NEXT: ret <8 x half> [[R]]
128120
;
129121
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -136,11 +128,7 @@ define <8 x half> @powi(<8 x half> %a) {
136128

137129
define <8 x i32> @lrint(<8 x half> %a) {
138130
; CHECK-LABEL: @lrint(
139-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
140-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
141-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f16(<4 x half> [[AT]])
142-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f16(<4 x half> [[AB]])
143-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABT]], <4 x i32> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
131+
; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f16(<8 x half> [[A:%.*]])
144132
; CHECK-NEXT: ret <8 x i32> [[R]]
145133
;
146134
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -392,15 +380,7 @@ define <8 x i8> @icmpsel(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
392380

393381
define <8 x half> @fma(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
394382
; CHECK-LABEL: @fma(
395-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
396-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
397-
; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
398-
; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
399-
; CHECK-NEXT: [[CB:%.*]] = shufflevector <8 x half> [[C:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
400-
; CHECK-NEXT: [[CT:%.*]] = shufflevector <8 x half> [[C]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
401-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[AB]], <4 x half> [[BB]], <4 x half> [[CB]])
402-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[AT]], <4 x half> [[BT]], <4 x half> [[CT]])
403-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
383+
; CHECK-NEXT: [[R:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]])
404384
; CHECK-NEXT: ret <8 x half> [[R]]
405385
;
406386
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -452,19 +432,10 @@ define void @exttrunc(<8 x i32> %a, <8 x i32> %b, ptr %p) {
452432

453433
define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) {
454434
; CHECK-LABEL: @intrinsics_minmax(
455-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
456-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
457-
; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
458-
; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
459-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]])
460-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]])
461-
; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]])
462-
; CHECK-NEXT: [[ABB1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABB]], <4 x i8> [[BB]])
463-
; CHECK-NEXT: [[ABT2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABT1]], <4 x i8> [[BT]])
464-
; CHECK-NEXT: [[ABB2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABB1]], <4 x i8> [[BB]])
465-
; CHECK-NEXT: [[ABT3:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[ABT2]], <4 x i8> [[BT]])
466-
; CHECK-NEXT: [[ABB3:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[ABB2]], <4 x i8> [[BB]])
467-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
435+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.smin.v8i8(<8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]])
436+
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[B]])
437+
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[TMP2]], <8 x i8> [[B]])
438+
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP3]], <8 x i8> [[B]])
468439
; CHECK-NEXT: ret <8 x i8> [[R]]
469440
;
470441
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -485,19 +456,10 @@ define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) {
485456

486457
define <8 x i8> @intrinsics_addsat(<8 x i8> %a, <8 x i8> %b) {
487458
; CHECK-LABEL: @intrinsics_addsat(
488-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
489-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
490-
; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
491-
; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
492-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]])
493-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]])
494-
; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]])
495-
; CHECK-NEXT: [[ABB1:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[ABB]], <4 x i8> [[BB]])
496-
; CHECK-NEXT: [[ABT2:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[ABT1]], <4 x i8> [[BT]])
497-
; CHECK-NEXT: [[ABB2:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[ABB1]], <4 x i8> [[BB]])
498-
; CHECK-NEXT: [[ABT3:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[ABT2]], <4 x i8> [[BT]])
499-
; CHECK-NEXT: [[ABB3:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[ABB2]], <4 x i8> [[BB]])
500-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
459+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]])
460+
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[B]])
461+
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> [[TMP2]], <8 x i8> [[B]])
462+
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> [[TMP3]], <8 x i8> [[B]])
501463
; CHECK-NEXT: ret <8 x i8> [[R]]
502464
;
503465
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

0 commit comments

Comments
 (0)