Skip to content

Commit 53dd72a

Browse files
[SVE][CodeGen] Lower SDIV & UDIV to SVE intrinsics
Summary: This patch maps IR operations for sdiv & udiv to the @llvm.aarch64.sve.[s|u]div intrinsics. A ptrue must be created during lowering as the div instructions have only a predicated form. Patch contains changes by Andrzej Warzynski. Reviewers: sdesmalen, c-rhodes, efriedma, cameron.mcinally, rengolin Reviewed By: efriedma Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, andwar, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78569
1 parent 823e2a6 commit 53dd72a

File tree

4 files changed

+94
-3
lines changed

4 files changed

+94
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -883,8 +883,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
883883
// splat of 0 or undef) once vector selects supported in SVE codegen. See
884884
// D68877 for more details.
885885
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
886-
if (isTypeLegal(VT))
886+
if (isTypeLegal(VT)) {
887887
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
888+
setOperationAction(ISD::SDIV, VT, Custom);
889+
setOperationAction(ISD::UDIV, VT, Custom);
890+
}
888891
}
889892
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
890893
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
@@ -1280,6 +1283,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
12801283
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
12811284
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
12821285
case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1286+
case AArch64ISD::SDIV_PRED: return "AArch64ISD::SDIV_PRED";
1287+
case AArch64ISD::UDIV_PRED: return "AArch64ISD::UDIV_PRED";
12831288
case AArch64ISD::ADC: return "AArch64ISD::ADC";
12841289
case AArch64ISD::SBC: return "AArch64ISD::SBC";
12851290
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
@@ -3342,6 +3347,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
33423347
return LowerSPLAT_VECTOR(Op, DAG);
33433348
case ISD::EXTRACT_SUBVECTOR:
33443349
return LowerEXTRACT_SUBVECTOR(Op, DAG);
3350+
case ISD::SDIV:
3351+
return LowerDIV(Op, DAG, AArch64ISD::SDIV_PRED);
3352+
case ISD::UDIV:
3353+
return LowerDIV(Op, DAG, AArch64ISD::UDIV_PRED);
33453354
case ISD::SRA:
33463355
case ISD::SRL:
33473356
case ISD::SHL:
@@ -7648,6 +7657,23 @@ SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
76487657
return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
76497658
}
76507659

7660+
SDValue AArch64TargetLowering::LowerDIV(SDValue Op,
7661+
SelectionDAG &DAG,
7662+
unsigned NewOp) const {
7663+
EVT VT = Op.getValueType();
7664+
SDLoc DL(Op);
7665+
7666+
assert(Op.getOperand(0).getValueType().isScalableVector() &&
7667+
Op.getOperand(1).getValueType().isScalableVector() &&
7668+
"Only scalable vectors are supported");
7669+
7670+
auto PredTy = VT.getVectorVT(*DAG.getContext(), MVT::i1,
7671+
VT.getVectorNumElements(), true);
7672+
SDValue Mask = getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
7673+
7674+
return DAG.getNode(NewOp, DL, VT, Mask, Op.getOperand(0), Op.getOperand(1));
7675+
}
7676+
76517677
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
76527678
APInt &UndefBits) {
76537679
EVT VT = BVN->getValueType(0);
@@ -11359,6 +11385,12 @@ static SDValue performIntrinsicCombine(SDNode *N,
1135911385
N->getOperand(1));
1136011386
case Intrinsic::aarch64_sve_ext:
1136111387
return LowerSVEIntrinsicEXT(N, DAG);
11388+
case Intrinsic::aarch64_sve_sdiv:
11389+
return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0),
11390+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
11391+
case Intrinsic::aarch64_sve_udiv:
11392+
return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0),
11393+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
1136211394
case Intrinsic::aarch64_sve_sel:
1136311395
return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
1136411396
N->getOperand(1), N->getOperand(2), N->getOperand(3));

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ enum NodeType : unsigned {
5252
ADC,
5353
SBC, // adc, sbc instructions
5454

55+
// Arithmetic instructions
56+
SDIV_PRED,
57+
UDIV_PRED,
58+
5559
// Arithmetic instructions which write flags.
5660
ADDS,
5761
SUBS,
@@ -781,6 +785,8 @@ class AArch64TargetLowering : public TargetLowering {
781785
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
782786
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
783787
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
788+
SDValue LowerDIV(SDValue Op, SelectionDAG &DAG,
789+
unsigned NewOp) const;
784790
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
785791
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
786792
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,14 @@ def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>
145145
def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
146146
def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;
147147

148+
def SDT_AArch64DIV : SDTypeProfile<1, 3, [
149+
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
150+
SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>
151+
]>;
152+
153+
def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64DIV>;
154+
def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64DIV>;
155+
148156
def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
149157
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
150158
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
@@ -239,8 +247,8 @@ let Predicates = [HasSVE] in {
239247
def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2),
240248
(MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>;
241249

242-
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", int_aarch64_sve_sdiv>;
243-
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", int_aarch64_sve_udiv>;
250+
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", AArch64sdiv_pred>;
251+
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", AArch64udiv_pred>;
244252
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>;
245253
defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>;
246254

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
2+
3+
;
4+
; SDIV
5+
;
6+
7+
define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
8+
; CHECK-LABEL: @sdiv_i32
9+
; CHECK-DAG: ptrue p0.s
10+
; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z1.s
11+
; CHECK-NEXT: ret
12+
%div = sdiv <vscale x 4 x i32> %a, %b
13+
ret <vscale x 4 x i32> %div
14+
}
15+
16+
define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
17+
; CHECK-LABEL: @sdiv_i64
18+
; CHECK-DAG: ptrue p0.d
19+
; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d
20+
; CHECK-NEXT: ret
21+
%div = sdiv <vscale x 2 x i64> %a, %b
22+
ret <vscale x 2 x i64> %div
23+
}
24+
25+
;
26+
; UDIV
27+
;
28+
29+
define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
30+
; CHECK-LABEL: @udiv_i32
31+
; CHECK-DAG: ptrue p0.s
32+
; CHECK-DAG: udiv z0.s, p0/m, z0.s, z1.s
33+
; CHECK-NEXT: ret
34+
%div = udiv <vscale x 4 x i32> %a, %b
35+
ret <vscale x 4 x i32> %div
36+
}
37+
38+
define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
39+
; CHECK-LABEL: @udiv_i64
40+
; CHECK-DAG: ptrue p0.d
41+
; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d
42+
; CHECK-NEXT: ret
43+
%div = udiv <vscale x 2 x i64> %a, %b
44+
ret <vscale x 2 x i64> %div
45+
}

0 commit comments

Comments
 (0)