Skip to content

Commit b7ed097

Browse files
authored
[VectorCombine] Add intrinsics handling to shuffleToIdentity (#91000)
This is probably the most involved addition, as it tries to make use of isTriviallyVectorizable with isVectorIntrinsicWithScalarOpAtArg to handle a number of different intrinsics that are all lane-wise. Additional tests have been added for some of the different intrinsics from isVectorIntrinsicWithScalarOpAtArg / isVectorIntrinsicWithOverloadTypeAtArg.
1 parent 17daa20 commit b7ed097

File tree

2 files changed

+42
-53
lines changed

2 files changed

+42
-53
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,7 +1729,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
17291729
return false;
17301730

17311731
// Look for an identity value.
1732-
if (Item[0].second == 0 && Item[0].first->getType() == Ty &&
1732+
if (Item[0].second == 0 &&
1733+
cast<FixedVectorType>(Item[0].first->getType())->getNumElements() ==
1734+
Ty->getNumElements() &&
17331735
all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
17341736
return !E.value().first || (E.value().first == Item[0].first &&
17351737
E.value().second == (int)E.index());
@@ -1773,6 +1775,20 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
17731775
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
17741776
} else if (isa<UnaryOperator>(Item[0].first)) {
17751777
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1778+
} else if (auto *II = dyn_cast<IntrinsicInst>(Item[0].first);
1779+
II && isTriviallyVectorizable(II->getIntrinsicID())) {
1780+
for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
1781+
if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
1782+
if (!all_of(drop_begin(Item), [&](InstLane &IL) {
1783+
return !IL.first ||
1784+
(cast<Instruction>(IL.first)->getOperand(Op) ==
1785+
cast<Instruction>(Item[0].first)->getOperand(Op));
1786+
}))
1787+
return false;
1788+
continue;
1789+
}
1790+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, Op));
1791+
}
17761792
} else {
17771793
return false;
17781794
}
@@ -1799,13 +1815,24 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
17991815
}
18001816

18011817
auto *I = cast<Instruction>(Item[0].first);
1802-
SmallVector<Value *> Ops(I->getNumOperands());
1803-
for (unsigned Idx = 0, E = I->getNumOperands(); Idx < E; Idx++)
1818+
auto *II = dyn_cast<IntrinsicInst>(I);
1819+
unsigned NumOps = I->getNumOperands() - (II ? 1 : 0);
1820+
SmallVector<Value *> Ops(NumOps);
1821+
for (unsigned Idx = 0; Idx < NumOps; Idx++) {
1822+
if (II && isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Idx)) {
1823+
Ops[Idx] = II->getOperand(Idx);
1824+
continue;
1825+
}
18041826
Ops[Idx] = Generate(GenerateInstLaneVectorFromOperand(Item, Idx));
1827+
}
18051828
Builder.SetInsertPoint(I);
1829+
Type *DstTy = FixedVectorType::get(I->getType()->getScalarType(),
1830+
Ty->getNumElements());
18061831
if (auto BI = dyn_cast<BinaryOperator>(I))
18071832
return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
18081833
Ops[0], Ops[1]);
1834+
if (II)
1835+
return Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
18091836
assert(isa<UnaryInstruction>(I) &&
18101837
"Unexpected instruction type in Generate");
18111838
return Builder.CreateUnOp((Instruction::UnaryOps)I->getOpcode(), Ops[0]);

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 12 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,7 @@ define <8 x half> @fneg(<8 x half> %a, <8 x half> %b) {
102102

103103
define <8 x i8> @abs(<8 x i8> %a) {
104104
; CHECK-LABEL: @abs(
105-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
106-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
107-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AT]], i1 false)
108-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AB]], i1 false)
109-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
105+
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[A:%.*]], i1 false)
110106
; CHECK-NEXT: ret <8 x i8> [[R]]
111107
;
112108
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -119,11 +115,7 @@ define <8 x i8> @abs(<8 x i8> %a) {
119115

120116
define <8 x half> @powi(<8 x half> %a) {
121117
; CHECK-LABEL: @powi(
122-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
123-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
124-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x half> @llvm.powi.v4f16.i32(<4 x half> [[AT]], i32 10)
125-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x half> @llvm.powi.v4f16.i32(<4 x half> [[AB]], i32 10)
126-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
118+
; CHECK-NEXT: [[R:%.*]] = call <8 x half> @llvm.powi.v8f16.i32(<8 x half> [[A:%.*]], i32 10)
127119
; CHECK-NEXT: ret <8 x half> [[R]]
128120
;
129121
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -171,11 +163,7 @@ declare <4 x half> @othercall(<4 x half>)
171163

172164
define <8 x i32> @lrint(<8 x half> %a) {
173165
; CHECK-LABEL: @lrint(
174-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
175-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
176-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f16(<4 x half> [[AT]])
177-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f16(<4 x half> [[AB]])
178-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABT]], <4 x i32> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
166+
; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f16(<8 x half> [[A:%.*]])
179167
; CHECK-NEXT: ret <8 x i32> [[R]]
180168
;
181169
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -427,15 +415,7 @@ define <8 x i8> @icmpsel(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
427415

428416
define <8 x half> @fma(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
429417
; CHECK-LABEL: @fma(
430-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
431-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
432-
; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
433-
; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
434-
; CHECK-NEXT: [[CB:%.*]] = shufflevector <8 x half> [[C:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
435-
; CHECK-NEXT: [[CT:%.*]] = shufflevector <8 x half> [[C]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
436-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[AB]], <4 x half> [[BB]], <4 x half> [[CB]])
437-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[AT]], <4 x half> [[BT]], <4 x half> [[CT]])
438-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
418+
; CHECK-NEXT: [[R:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]])
439419
; CHECK-NEXT: ret <8 x half> [[R]]
440420
;
441421
%ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -487,19 +467,10 @@ define void @exttrunc(<8 x i32> %a, <8 x i32> %b, ptr %p) {
487467

488468
define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) {
489469
; CHECK-LABEL: @intrinsics_minmax(
490-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
491-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
492-
; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
493-
; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
494-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]])
495-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]])
496-
; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]])
497-
; CHECK-NEXT: [[ABB1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABB]], <4 x i8> [[BB]])
498-
; CHECK-NEXT: [[ABT2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABT1]], <4 x i8> [[BT]])
499-
; CHECK-NEXT: [[ABB2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABB1]], <4 x i8> [[BB]])
500-
; CHECK-NEXT: [[ABT3:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[ABT2]], <4 x i8> [[BT]])
501-
; CHECK-NEXT: [[ABB3:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[ABB2]], <4 x i8> [[BB]])
502-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
470+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.smin.v8i8(<8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]])
471+
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[B]])
472+
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[TMP2]], <8 x i8> [[B]])
473+
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP3]], <8 x i8> [[B]])
503474
; CHECK-NEXT: ret <8 x i8> [[R]]
504475
;
505476
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -520,19 +491,10 @@ define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) {
520491

521492
define <8 x i8> @intrinsics_addsat(<8 x i8> %a, <8 x i8> %b) {
522493
; CHECK-LABEL: @intrinsics_addsat(
523-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
524-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
525-
; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
526-
; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
527-
; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]])
528-
; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]])
529-
; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]])
530-
; CHECK-NEXT: [[ABB1:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[ABB]], <4 x i8> [[BB]])
531-
; CHECK-NEXT: [[ABT2:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[ABT1]], <4 x i8> [[BT]])
532-
; CHECK-NEXT: [[ABB2:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[ABB1]], <4 x i8> [[BB]])
533-
; CHECK-NEXT: [[ABT3:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[ABT2]], <4 x i8> [[BT]])
534-
; CHECK-NEXT: [[ABB3:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[ABB2]], <4 x i8> [[BB]])
535-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
494+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]])
495+
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[B]])
496+
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> [[TMP2]], <8 x i8> [[B]])
497+
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> [[TMP3]], <8 x i8> [[B]])
536498
; CHECK-NEXT: ret <8 x i8> [[R]]
537499
;
538500
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

0 commit comments

Comments
 (0)