Skip to content

Commit 04c6143

Browse files
authored
[InstCombine] Pull vector reverse through intrinsics (#146384)
This is the intrinsic version of #146349, and handles fabs as well as other intrinsics. It's largely a copy of InstCombinerImpl::foldShuffledIntrinsicOperands but a bit simpler since we don't need to find a common mask. Creating a separate function seems to be cleaner than trying to shoehorn it into the existing one.
1 parent 4dab0b2 commit 04c6143

File tree

3 files changed

+188
-0
lines changed

3 files changed

+188
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,43 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
14561456
return new ShuffleVectorInst(NewIntrinsic, Mask);
14571457
}
14581458

1459+
/// If all arguments of the intrinsic are reverses, try to pull the reverse
1460+
/// after the intrinsic.
1461+
Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) {
1462+
if (!isTriviallyVectorizable(II->getIntrinsicID()))
1463+
return nullptr;
1464+
1465+
// At least 1 operand must be a reverse with 1 use because we are creating 2
1466+
// instructions.
1467+
if (none_of(II->args(), [](Value *V) {
1468+
return match(V, m_OneUse(m_VecReverse(m_Value())));
1469+
}))
1470+
return nullptr;
1471+
1472+
Value *X;
1473+
Constant *C;
1474+
SmallVector<Value *> NewArgs;
1475+
for (Use &Arg : II->args()) {
1476+
if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1477+
Arg.getOperandNo(), nullptr))
1478+
NewArgs.push_back(Arg);
1479+
else if (match(&Arg, m_VecReverse(m_Value(X))))
1480+
NewArgs.push_back(X);
1481+
else if (isSplatValue(Arg))
1482+
NewArgs.push_back(Arg);
1483+
else if (match(&Arg, m_ImmConstant(C)))
1484+
NewArgs.push_back(Builder.CreateVectorReverse(C));
1485+
else
1486+
return nullptr;
1487+
}
1488+
1489+
// intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1490+
Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1491+
Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1492+
II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1493+
return Builder.CreateVectorReverse(NewIntrinsic);
1494+
}
1495+
14591496
/// Fold the following cases and accepts bswap and bitreverse intrinsics:
14601497
/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
14611498
/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
@@ -3866,6 +3903,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
38663903
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
38673904
return Shuf;
38683905

3906+
if (Value *Reverse = foldReversedIntrinsicOperands(II))
3907+
return replaceInstUsesWith(*II, Reverse);
3908+
38693909
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
38703910
// context, so it is handled in visitCallBase and we should trigger it.
38713911
return visitCallBase(*II);

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
148148
Instruction *foldItoFPtoI(CastInst &FI);
149149
Instruction *visitSelectInst(SelectInst &SI);
150150
Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II);
151+
Value *foldReversedIntrinsicOperands(IntrinsicInst *II);
151152
Instruction *visitCallInst(CallInst &CI);
152153
Instruction *visitInvokeInst(InvokeInst &II);
153154
Instruction *visitCallBrInst(CallBrInst &CBI);

llvm/test/Transforms/InstCombine/vector-reverse.ll

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,18 @@ define <vscale x 4 x i32> @binop_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i3
1717
ret <vscale x 4 x i32> %add
1818
}
1919

20+
define <vscale x 4 x i32> @binop_intrinsic_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
21+
; CHECK-LABEL: @binop_intrinsic_reverse(
22+
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV:%.*]], <vscale x 4 x i32> [[B_REV:%.*]])
23+
; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]])
24+
; CHECK-NEXT: ret <vscale x 4 x i32> [[SMAX]]
25+
;
26+
%a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
27+
%b.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %b)
28+
%smax = call <vscale x 4 x i32> @llvm.smax(<vscale x 4 x i32> %a.rev, <vscale x 4 x i32> %b.rev)
29+
ret <vscale x 4 x i32> %smax
30+
}
31+
2032
; %a.rev has multiple uses
2133
define <vscale x 4 x i32> @binop_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2234
; CHECK-LABEL: @binop_reverse_1(
@@ -33,6 +45,22 @@ define <vscale x 4 x i32> @binop_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x
3345
ret <vscale x 4 x i32> %add
3446
}
3547

48+
; %a.rev has multiple uses
49+
define <vscale x 4 x i32> @binop_intrinsic_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
50+
; CHECK-LABEL: @binop_intrinsic_reverse_1(
51+
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
52+
; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[B_REV]])
53+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[B1:%.*]])
54+
; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP1]])
55+
; CHECK-NEXT: ret <vscale x 4 x i32> [[SMAX]]
56+
;
57+
%a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
58+
%b.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %b)
59+
call void @use_nxv4i32(<vscale x 4 x i32> %a.rev)
60+
%smax = call <vscale x 4 x i32> @llvm.smax(<vscale x 4 x i32> %a.rev, <vscale x 4 x i32> %b.rev)
61+
ret <vscale x 4 x i32> %smax
62+
}
63+
3664
; %b.rev has multiple uses
3765
define <vscale x 4 x i32> @binop_reverse_2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
3866
; CHECK-LABEL: @binop_reverse_2(
@@ -67,6 +95,24 @@ define <vscale x 4 x i32> @binop_reverse_3(<vscale x 4 x i32> %a, <vscale x 4 x
6795
ret <vscale x 4 x i32> %add
6896
}
6997

98+
; %a.rev and %b.rev have multiple uses
99+
define <vscale x 4 x i32> @binop_intrinsic_reverse_3(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
100+
; CHECK-LABEL: @binop_intrinsic_reverse_3(
101+
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
102+
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
103+
; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[A_REV]])
104+
; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[B_REV]])
105+
; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV]], <vscale x 4 x i32> [[B_REV]])
106+
; CHECK-NEXT: ret <vscale x 4 x i32> [[SMAX]]
107+
;
108+
%a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
109+
%b.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %b)
110+
call void @use_nxv4i32(<vscale x 4 x i32> %a.rev)
111+
call void @use_nxv4i32(<vscale x 4 x i32> %b.rev)
112+
%smax = call <vscale x 4 x i32> @llvm.smax(<vscale x 4 x i32> %a.rev, <vscale x 4 x i32> %b.rev)
113+
ret <vscale x 4 x i32> %smax
114+
}
115+
70116
; %a.rev used as both operands
71117
define <vscale x 4 x i32> @binop_reverse_4(<vscale x 4 x i32> %a) {
72118
; CHECK-LABEL: @binop_reverse_4(
@@ -184,6 +230,17 @@ define <vscale x 4 x float> @unop_reverse_1(<vscale x 4 x float> %a) {
184230
ret <vscale x 4 x float> %neg
185231
}
186232

233+
define <vscale x 4 x float> @unop_intrinsic_reverse(<vscale x 4 x float> %a) {
234+
; CHECK-LABEL: @unop_intrinsic_reverse(
235+
; CHECK-NEXT: [[NEG:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]])
236+
; CHECK-NEXT: [[ABS:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[NEG]])
237+
; CHECK-NEXT: ret <vscale x 4 x float> [[ABS]]
238+
;
239+
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
240+
%abs = call <vscale x 4 x float> @llvm.fabs(<vscale x 4 x float> %a.rev)
241+
ret <vscale x 4 x float> %abs
242+
}
243+
187244
define <vscale x 4 x i1> @icmp_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
188245
; CHECK-LABEL: @icmp_reverse(
189246
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
@@ -629,6 +686,18 @@ define <vscale x 4 x float> @reverse_binop_reverse(<vscale x 4 x float> %a, <vsc
629686
ret <vscale x 4 x float> %add.rev
630687
}
631688

689+
define <vscale x 4 x float> @reverse_binop_intrinsic_reverse(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
690+
; CHECK-LABEL: @reverse_binop_intrinsic_reverse(
691+
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]], <vscale x 4 x float> [[B_REV:%.*]])
692+
; CHECK-NEXT: ret <vscale x 4 x float> [[ADD]]
693+
;
694+
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
695+
%b.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %b)
696+
%maxnum = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %a.rev, <vscale x 4 x float> %b.rev)
697+
%maxnum.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %maxnum)
698+
ret <vscale x 4 x float> %maxnum.rev
699+
}
700+
632701
define <vscale x 4 x float> @reverse_binop_reverse_splat_RHS(<vscale x 4 x float> %a, float %b) {
633702
; CHECK-LABEL: @reverse_binop_reverse_splat_RHS(
634703
; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
@@ -659,6 +728,63 @@ define <vscale x 4 x float> @reverse_binop_reverse_splat_LHS(<vscale x 4 x float
659728
ret <vscale x 4 x float> %div.rev
660729
}
661730

731+
define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_RHS(<vscale x 4 x float> %a, float %b) {
732+
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_RHS(
733+
; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
734+
; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[B_INSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
735+
; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]], <vscale x 4 x float> [[B_SPLAT]])
736+
; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM]]
737+
;
738+
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
739+
%b.insert = insertelement <vscale x 4 x float> poison, float %b, i32 0
740+
%b.splat = shufflevector <vscale x 4 x float> %b.insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
741+
%maxnum = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %a.rev, <vscale x 4 x float> %b.splat)
742+
%maxnum.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %maxnum)
743+
ret <vscale x 4 x float> %maxnum.rev
744+
}
745+
746+
define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_LHS(<vscale x 4 x float> %a, float %b) {
747+
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_LHS(
748+
; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
749+
; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[B_INSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
750+
; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[B_SPLAT]], <vscale x 4 x float> [[A_REV:%.*]])
751+
; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM]]
752+
;
753+
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
754+
%b.insert = insertelement <vscale x 4 x float> poison, float %b, i32 0
755+
%b.splat = shufflevector <vscale x 4 x float> %b.insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
756+
%maxnum = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %b.splat, <vscale x 4 x float> %a.rev)
757+
%maxnum.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %maxnum)
758+
ret <vscale x 4 x float> %maxnum.rev
759+
}
760+
761+
; Negative test: Make sure that splats with poison aren't considered splats
762+
define <4 x float> @reverse_binop_reverse_intrinsic_splat_with_poison(<4 x float> %a) {
763+
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_with_poison(
764+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> <float 1.000000e+00, float poison, float 1.000000e+00, float 1.000000e+00>)
765+
; CHECK-NEXT: [[MAXNUM:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
766+
; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]])
767+
; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]]
768+
;
769+
%a.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %a)
770+
%maxnum = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 1.0, float 1.0, float poison, float 1.0>, <4 x float> %a.rev)
771+
%maxnum.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %maxnum)
772+
ret <4 x float> %maxnum.rev
773+
}
774+
775+
define <4 x float> @reverse_binop_reverse_intrinsic_constant_RHS(<4 x float> %a) {
776+
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_constant_RHS(
777+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 0.000000e+00>)
778+
; CHECK-NEXT: [[MAXNUM:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
779+
; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]])
780+
; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]]
781+
;
782+
%a.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %a)
783+
%maxnum = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 0.0, float 1.0, float 2.0, float 3.0>, <4 x float> %a.rev)
784+
%maxnum.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %maxnum)
785+
ret <4 x float> %maxnum.rev
786+
}
787+
662788
define <vscale x 4 x i1> @reverse_fcmp_reverse(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
663789
; CHECK-LABEL: @reverse_fcmp_reverse(
664790
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt <vscale x 4 x float> [[A:%.*]], [[B:%.*]]
@@ -695,6 +821,27 @@ define <vscale x 4 x float> @reverse_unop_reverse(<vscale x 4 x float> %a) {
695821
ret <vscale x 4 x float> %neg.rev
696822
}
697823

824+
define <vscale x 4 x float> @reverse_unop_intrinsic_reverse(<vscale x 4 x float> %a) {
825+
; CHECK-LABEL: @reverse_unop_intrinsic_reverse(
826+
; CHECK-NEXT: [[ABS:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]])
827+
; CHECK-NEXT: ret <vscale x 4 x float> [[ABS]]
828+
;
829+
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
830+
%abs = call <vscale x 4 x float> @llvm.fabs(<vscale x 4 x float> %a.rev)
831+
%abs.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %abs)
832+
ret <vscale x 4 x float> %abs.rev
833+
}
834+
835+
define <vscale x 4 x float> @reverse_unop_intrinsic_reverse_scalar_arg(<vscale x 4 x float> %a, i32 %power) {
836+
; CHECK-LABEL: @reverse_unop_intrinsic_reverse_scalar_arg(
837+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> [[A:%.*]], i32 [[POWER:%.*]])
838+
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
839+
;
840+
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
841+
%powi = call <vscale x 4 x float> @llvm.powi.nxv4f32(<vscale x 4 x float> %a.rev, i32 %power)
842+
%powi.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %powi)
843+
ret <vscale x 4 x float> %powi.rev
844+
}
698845

699846
declare void @use_nxv4i1(<vscale x 4 x i1>)
700847
declare void @use_nxv4i32(<vscale x 4 x i32>)

0 commit comments

Comments
 (0)