Skip to content

Commit e9ab434

Browse files
committed
[InstCombine] Pull vector reverse through intrinsics
This is the intrinsic version of #146349, and handles fabs as well as other intrinsics. It's largely a copy of InstCombinerImpl::foldShuffledIntrinsicOperands but a bit simpler since we don't need to find a common mask. Creating a separate function seems to be cleaner than trying to shoehorn it into the existing one.
1 parent 1ae4ddc commit e9ab434

File tree

3 files changed

+64
-33
lines changed

3 files changed

+64
-33
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,45 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
14561456
return new ShuffleVectorInst(NewIntrinsic, Mask);
14571457
}
14581458

1459+
/// If all arguments of the intrinsic are reverses, try to pull the reverse
1460+
/// after the intrinsic.
1461+
Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) {
1462+
if (!isTriviallyVectorizable(II->getIntrinsicID()) ||
1463+
!II->getCalledFunction()->isSpeculatable())
1464+
return nullptr;
1465+
1466+
// At least 1 operand must be a reverse with 1 use because we are creating 2
1467+
// instructions.
1468+
if (none_of(II->args(), [](Value *V) {
1469+
return match(V, m_OneUse(m_VecReverse(m_Value())));
1470+
}))
1471+
return nullptr;
1472+
1473+
Value *X;
1474+
Constant *C;
1475+
SmallVector<Value *> NewArgs;
1476+
for (Use &Arg : II->args()) {
1477+
if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1478+
Arg.getOperandNo(), nullptr))
1479+
NewArgs.push_back(Arg);
1480+
else if (match(&Arg, m_VecReverse(m_Value(X))))
1481+
NewArgs.push_back(X);
1482+
else if (Value *Splat = getSplatValue(Arg))
1483+
NewArgs.push_back(Builder.CreateVectorSplat(
1484+
cast<VectorType>(Arg->getType())->getElementCount(), Splat));
1485+
else if (match(&Arg, m_ImmConstant(C)))
1486+
NewArgs.push_back(Builder.CreateVectorReverse(C));
1487+
else
1488+
return nullptr;
1489+
}
1490+
1491+
// intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1492+
Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1493+
Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1494+
II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1495+
return Builder.CreateVectorReverse(NewIntrinsic);
1496+
}
1497+
14591498
/// Fold the following cases and accepts bswap and bitreverse intrinsics:
14601499
/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
14611500
/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
@@ -3867,6 +3906,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
38673906
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
38683907
return Shuf;
38693908

3909+
if (Value *Reverse = foldReversedIntrinsicOperands(II))
3910+
return replaceInstUsesWith(*II, Reverse);
3911+
38703912
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
38713913
// context, so it is handled in visitCallBase and we should trigger it.
38723914
return visitCallBase(*II);

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
148148
Instruction *foldItoFPtoI(CastInst &FI);
149149
Instruction *visitSelectInst(SelectInst &SI);
150150
Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II);
151+
Value *foldReversedIntrinsicOperands(IntrinsicInst *II);
151152
Instruction *visitCallInst(CallInst &CI);
152153
Instruction *visitInvokeInst(InvokeInst &II);
153154
Instruction *visitCallBrInst(CallBrInst &CBI);

llvm/test/Transforms/InstCombine/vector-reverse.ll

Lines changed: 21 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@ define <vscale x 4 x i32> @binop_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i3
1919

2020
define <vscale x 4 x i32> @binop_intrinsic_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: @binop_intrinsic_reverse(
22-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
23-
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
24-
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV]], <vscale x 4 x i32> [[B_REV]])
25-
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
22+
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV:%.*]], <vscale x 4 x i32> [[B_REV:%.*]])
23+
; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]])
24+
; CHECK-NEXT: ret <vscale x 4 x i32> [[SMAX]]
2625
;
2726
%a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
2827
%b.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %b)
@@ -49,10 +48,10 @@ define <vscale x 4 x i32> @binop_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x
4948
; %a.rev has multiple uses
5049
define <vscale x 4 x i32> @binop_intrinsic_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
5150
; CHECK-LABEL: @binop_intrinsic_reverse_1(
52-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
5351
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
54-
; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[A_REV]])
55-
; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV]], <vscale x 4 x i32> [[B_REV]])
52+
; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[B_REV]])
53+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[B1:%.*]])
54+
; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP1]])
5655
; CHECK-NEXT: ret <vscale x 4 x i32> [[SMAX]]
5756
;
5857
%a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -233,9 +232,9 @@ define <vscale x 4 x float> @unop_reverse_1(<vscale x 4 x float> %a) {
233232

234233
define <vscale x 4 x float> @unop_intrinsic_reverse(<vscale x 4 x float> %a) {
235234
; CHECK-LABEL: @unop_intrinsic_reverse(
236-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
237-
; CHECK-NEXT: [[NEG:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV]])
238-
; CHECK-NEXT: ret <vscale x 4 x float> [[NEG]]
235+
; CHECK-NEXT: [[NEG:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]])
236+
; CHECK-NEXT: [[ABS:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[NEG]])
237+
; CHECK-NEXT: ret <vscale x 4 x float> [[ABS]]
239238
;
240239
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
241240
%abs = call <vscale x 4 x float> @llvm.fabs(<vscale x 4 x float> %a.rev)
@@ -689,11 +688,8 @@ define <vscale x 4 x float> @reverse_binop_reverse(<vscale x 4 x float> %a, <vsc
689688

690689
define <vscale x 4 x float> @reverse_binop_intrinsic_reverse(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
691690
; CHECK-LABEL: @reverse_binop_intrinsic_reverse(
692-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
693-
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[B:%.*]])
694-
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV]], <vscale x 4 x float> [[B_REV]])
695-
; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[ADD]])
696-
; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM_REV]]
691+
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]], <vscale x 4 x float> [[B_REV:%.*]])
692+
; CHECK-NEXT: ret <vscale x 4 x float> [[ADD]]
697693
;
698694
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
699695
%b.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %b)
@@ -734,12 +730,10 @@ define <vscale x 4 x float> @reverse_binop_reverse_splat_LHS(<vscale x 4 x float
734730

735731
define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_RHS(<vscale x 4 x float> %a, float %b) {
736732
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_RHS(
737-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
738733
; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
739734
; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[B_INSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
740-
; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV]], <vscale x 4 x float> [[B_SPLAT]])
741-
; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[MAXNUM]])
742-
; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM_REV]]
735+
; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]], <vscale x 4 x float> [[B_SPLAT]])
736+
; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM]]
743737
;
744738
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
745739
%b.insert = insertelement <vscale x 4 x float> poison, float %b, i32 0
@@ -751,12 +745,10 @@ define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_RHS(<vscale x
751745

752746
define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_LHS(<vscale x 4 x float> %a, float %b) {
753747
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_LHS(
754-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
755748
; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
756749
; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[B_INSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
757-
; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[B_SPLAT]], <vscale x 4 x float> [[A_REV]])
758-
; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[MAXNUM]])
759-
; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM_REV]]
750+
; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[B_SPLAT]], <vscale x 4 x float> [[A_REV:%.*]])
751+
; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM]]
760752
;
761753
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
762754
%b.insert = insertelement <vscale x 4 x float> poison, float %b, i32 0
@@ -768,8 +760,8 @@ define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_LHS(<vscale x
768760

769761
define <4 x float> @reverse_binop_reverse_intrinsic_constant_RHS(<4 x float> %a) {
770762
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_constant_RHS(
771-
; CHECK-NEXT: [[MAXNUM_REV1:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM1:%.*]])
772-
; CHECK-NEXT: [[MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[MAXNUM_REV1]], <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>)
763+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 0.000000e+00>)
764+
; CHECK-NEXT: [[MAXNUM:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
773765
; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]])
774766
; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]]
775767
;
@@ -817,10 +809,8 @@ define <vscale x 4 x float> @reverse_unop_reverse(<vscale x 4 x float> %a) {
817809

818810
define <vscale x 4 x float> @reverse_unop_intrinsic_reverse(<vscale x 4 x float> %a) {
819811
; CHECK-LABEL: @reverse_unop_intrinsic_reverse(
820-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
821-
; CHECK-NEXT: [[ABS:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV]])
822-
; CHECK-NEXT: [[ABS_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[ABS]])
823-
; CHECK-NEXT: ret <vscale x 4 x float> [[ABS_REV]]
812+
; CHECK-NEXT: [[ABS:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]])
813+
; CHECK-NEXT: ret <vscale x 4 x float> [[ABS]]
824814
;
825815
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
826816
%abs = call <vscale x 4 x float> @llvm.fabs(<vscale x 4 x float> %a.rev)
@@ -830,10 +820,8 @@ define <vscale x 4 x float> @reverse_unop_intrinsic_reverse(<vscale x 4 x float>
830820

831821
define <vscale x 4 x float> @reverse_unop_intrinsic_reverse_scalar_arg(<vscale x 4 x float> %a, i32 %power) {
832822
; CHECK-LABEL: @reverse_unop_intrinsic_reverse_scalar_arg(
833-
; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A1:%.*]])
834-
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> [[A]], i32 [[POWER:%.*]])
835-
; CHECK-NEXT: [[POWI_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[TMP1]])
836-
; CHECK-NEXT: ret <vscale x 4 x float> [[POWI_REV]]
823+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> [[A:%.*]], i32 [[POWER:%.*]])
824+
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
837825
;
838826
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
839827
%powi = call <vscale x 4 x float> @llvm.powi.nxv4f32(<vscale x 4 x float> %a.rev, i32 %power)

0 commit comments

Comments
 (0)