Skip to content

Commit fb47725

Browse files
committed
[AArch64][SVE] Instcombine SDIV to ASRD
Instcombine SDIV to ASRD when the third operand of SDIV is a power of 2 Differential Revision: https://reviews.llvm.org/D115448
1 parent c135248 commit fb47725

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,40 @@ static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
10281028
return None;
10291029
}
10301030

1031+
static Optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
1032+
IntrinsicInst &II) {
1033+
IRBuilder<> Builder(II.getContext());
1034+
Builder.SetInsertPoint(&II);
1035+
Type *Int32Ty = Builder.getInt32Ty();
1036+
Value *Pred = II.getOperand(0);
1037+
Value *Vec = II.getOperand(1);
1038+
Value *DivVec = II.getOperand(2);
1039+
1040+
Value *SplatValue = getSplatValue(DivVec);
1041+
ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
1042+
if (!SplatConstantInt)
1043+
return None;
1044+
APInt Divisor = SplatConstantInt->getValue();
1045+
1046+
if (Divisor.isPowerOf2()) {
1047+
Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
1048+
auto ASRD = Builder.CreateIntrinsic(
1049+
Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
1050+
return IC.replaceInstUsesWith(II, ASRD);
1051+
}
1052+
if (Divisor.isNegatedPowerOf2()) {
1053+
Divisor.negate();
1054+
Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
1055+
auto ASRD = Builder.CreateIntrinsic(
1056+
Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
1057+
auto NEG = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_neg,
1058+
{ASRD->getType()}, {ASRD, Pred, ASRD});
1059+
return IC.replaceInstUsesWith(II, NEG);
1060+
}
1061+
1062+
return None;
1063+
}
1064+
10311065
Optional<Instruction *>
10321066
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
10331067
IntrinsicInst &II) const {
@@ -1088,6 +1122,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
10881122
return instCombineSVELD1(IC, II, DL);
10891123
case Intrinsic::aarch64_sve_st1:
10901124
return instCombineSVEST1(IC, II, DL);
1125+
case Intrinsic::aarch64_sve_sdiv:
1126+
return instCombineSVESDIV(IC, II);
10911127
}
10921128

10931129
return None;
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -instcombine < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
7+
; CHECK-LABEL: @sdiv_i32(
8+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], i32 23)
9+
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
10+
;
11+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
12+
ret <vscale x 4 x i32> %out
13+
}
14+
15+
define <vscale x 4 x i32> @sdiv_i32_neg(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
16+
; CHECK-LABEL: @sdiv_i32_neg(
17+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], i32 23)
18+
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[TMP1]])
19+
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
20+
;
21+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
22+
ret <vscale x 4 x i32> %out
23+
}
24+
25+
define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) #0 {
26+
; CHECK-LABEL: @sdiv_i64(
27+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], i32 23)
28+
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
29+
;
30+
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8388608, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
31+
ret <vscale x 2 x i64> %out
32+
}
33+
34+
define <vscale x 2 x i64> @sdiv_i64_neg(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) #0 {
35+
; CHECK-LABEL: @sdiv_i64_neg(
36+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], i32 23)
37+
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[TMP1]])
38+
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
39+
;
40+
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -8388608, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
41+
ret <vscale x 2 x i64> %out
42+
}
43+
44+
define <vscale x 4 x i32> @sdiv_i32_not_base2(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
45+
; CHECK-LABEL: @sdiv_i32_not_base2(
46+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
47+
; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
48+
;
49+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
50+
ret <vscale x 4 x i32> %out
51+
}
52+
53+
define <vscale x 4 x i32> @sdiv_i32_not_base2_neg(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
54+
; CHECK-LABEL: @sdiv_i32_not_base2_neg(
55+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
56+
; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
57+
;
58+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
59+
ret <vscale x 4 x i32> %out
60+
}
61+
62+
define <vscale x 4 x i32> @sdiv_i32_not_zero(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
63+
; CHECK-LABEL: @sdiv_i32_not_zero(
64+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> zeroinitializer)
65+
; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
66+
;
67+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
68+
ret <vscale x 4 x i32> %out
69+
}
70+
71+
72+
declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
73+
declare <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
74+
75+
attributes #0 = { "target-features"="+sve" }

0 commit comments

Comments
 (0)